Index: llvm/lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -2157,6 +2157,37 @@ return false; // Assume to be zero-cost. } + auto getIntImmCostInst = [&TTI](const SCEV *User, const SCEV *S, unsigned Idx, + int &BudgetRemaining) { + if (!isa(S)) + return false; + + const APInt &Imm = cast(S)->getAPInt(); + Type *Ty = S->getType(); + switch (User->getSCEVType()) { + default: + return false; + case scAddExpr: + BudgetRemaining -= TTI.getIntImmCostInst(Instruction::Add, Idx, Imm, Ty); + break; + case scMulExpr: + BudgetRemaining -= TTI.getIntImmCostInst(Instruction::Mul, Idx, Imm, Ty); + break; + case scSMaxExpr: + case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: + BudgetRemaining -= TTI.getIntImmCostInst(Instruction::ICmp, Idx, Imm, Ty); + BudgetRemaining -= + TTI.getIntImmCostInst(Instruction::Select, Idx + 1, Imm, Ty); + break; + case scUDivExpr: + BudgetRemaining -= TTI.getIntImmCostInst(Instruction::UDiv, Idx, Imm, Ty); + break; + } + return true; + }; + if (auto *CastExpr = dyn_cast(S)) { unsigned Opcode; switch (S->getSCEVType()) { @@ -2208,7 +2239,10 @@ // Need to count the cost of this UDiv. BudgetRemaining -= TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType()); - Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()}); + if (!getIntImmCostInst(UDivExpr, UDivExpr->getLHS(), 0, BudgetRemaining)) + Worklist.emplace_back(UDivExpr->getLHS()); + if (!getIntImmCostInst(UDivExpr, UDivExpr->getRHS(), 1, BudgetRemaining)) + Worklist.emplace_back(UDivExpr->getRHS()); return false; // Will answer upon next entry into this function. } @@ -2262,8 +2296,12 @@ return true; // And finally, the operands themselves should fit within the budget. - Worklist.insert(Worklist.end(), NAry->operands().begin(), - NAry->operands().end()); + unsigned Idx = 0; + for (auto *Op : NAry->operands()) { + if (!getIntImmCostInst(NAry, Op, Idx, BudgetRemaining)) + Worklist.emplace_back(Op); + ++Idx; + } return false; // So far so good, though ops may be too costly? } @@ -2303,8 +2341,12 @@ return true; // And finally, the operands themselves should fit within the budget. - Worklist.insert(Worklist.end(), NAry->operands().begin(), - NAry->operands().end()); + unsigned Idx = 0; + for (auto *Op : NAry->operands()) { + if (!getIntImmCostInst(NAry, Op, Idx, BudgetRemaining)) + Worklist.emplace_back(Op); + ++Idx; + } return false; // So far so good, though ops may be too costly? } Index: llvm/test/CodeGen/ARM/indvar-cost.ll =================================================================== --- llvm/test/CodeGen/ARM/indvar-cost.ll +++ llvm/test/CodeGen/ARM/indvar-cost.ll @@ -13,9 +13,6 @@ ; CHECK-T1-NEXT: [[CMP41080:%.*]] = icmp eq i32 [[SUB]], 0 ; CHECK-T1-NEXT: br i1 [[CMP41080]], label [[WHILE_END13:%.*]], label [[WHILE_COND5_PREHEADER_PREHEADER:%.*]] ; CHECK-T1: while.cond5.preheader.preheader: -; CHECK-T1-NEXT: [[TMP0:%.*]] = add i32 [[SRCALEN_SRCBLEN]], -2 -; CHECK-T1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 2 -; CHECK-T1-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 2 ; CHECK-T1-NEXT: br label [[WHILE_COND5_PREHEADER:%.*]] ; CHECK-T1: while.cond5.preheader: ; CHECK-T1-NEXT: [[COUNT_01084:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_END:%.*]] ], [ 1, [[WHILE_COND5_PREHEADER_PREHEADER]] ] @@ -29,11 +26,11 @@ ; CHECK-T1-NEXT: [[PY_11076:%.*]] = phi i16* [ [[INCDEC_PTR8:%.*]], [[WHILE_BODY7]] ], [ [[PY_01082]], [[WHILE_COND5_PREHEADER]] ] ; CHECK-T1-NEXT: [[PX_11075:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY7]] ], [ [[PSRCB_PSRCA]], [[WHILE_COND5_PREHEADER]] ] ; CHECK-T1-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[PX_11075]], i32 1 -; CHECK-T1-NEXT: [[TMP2:%.*]] = load i16, i16* [[PX_11075]], align 2 -; CHECK-T1-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +; CHECK-T1-NEXT: [[TMP0:%.*]] = load i16, i16* [[PX_11075]], align 2 +; CHECK-T1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-T1-NEXT: [[INCDEC_PTR8]] = getelementptr inbounds i16, i16* [[PY_11076]], i32 -1 -; CHECK-T1-NEXT: [[TMP3:%.*]] = load i16, i16* [[PY_11076]], align 2 -; CHECK-T1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP3]] to i32 +; CHECK-T1-NEXT: [[TMP1:%.*]] = load i16, i16* [[PY_11076]], align 2 +; CHECK-T1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 ; CHECK-T1-NEXT: [[MUL_I:%.*]] = mul nsw i32 [[CONV9]], [[CONV]] ; CHECK-T1-NEXT: [[SHR3_I:%.*]] = ashr i32 [[CONV]], 16 ; CHECK-T1-NEXT: [[SHR4_I:%.*]] = ashr i32 [[CONV9]], 16 @@ -45,8 +42,8 @@ ; CHECK-T1-NEXT: br i1 [[CMP6]], label [[WHILE_END]], label [[WHILE_BODY7]] ; CHECK-T1: while.end: ; CHECK-T1-NEXT: [[ADD6_I_LCSSA:%.*]] = phi i32 [ [[ADD6_I]], [[WHILE_BODY7]] ] -; CHECK-T1-NEXT: [[TMP4:%.*]] = lshr i32 [[ADD6_I_LCSSA]], 15 -; CHECK-T1-NEXT: [[CONV10:%.*]] = trunc i32 [[TMP4]] to i16 +; CHECK-T1-NEXT: [[TMP2:%.*]] = lshr i32 [[ADD6_I_LCSSA]], 15 +; CHECK-T1-NEXT: [[CONV10:%.*]] = trunc i32 [[TMP2]] to i16 ; CHECK-T1-NEXT: [[INCDEC_PTR11]] = getelementptr inbounds i16, i16* [[POUT_01081]], i32 1 ; CHECK-T1-NEXT: store i16 [[CONV10]], i16* [[POUT_01081]], align 2 ; CHECK-T1-NEXT: [[ADD_PTR]] = getelementptr inbounds i16, i16* [[PSRCA_PSRCB]], i32 [[COUNT_01084]] @@ -54,19 +51,19 @@ ; CHECK-T1-NEXT: [[DEC12]] = add i32 [[BLOCKSIZE1_01083]], -1 ; CHECK-T1-NEXT: [[CMP3:%.*]] = icmp ult i32 [[COUNT_01084]], 3 ; CHECK-T1-NEXT: [[CMP4:%.*]] = icmp ne i32 [[DEC12]], 0 -; CHECK-T1-NEXT: [[TMP5:%.*]] = and i1 [[CMP4]], [[CMP3]] -; CHECK-T1-NEXT: br i1 [[TMP5]], label [[WHILE_COND5_PREHEADER]], label [[WHILE_END13_LOOPEXIT:%.*]] +; CHECK-T1-NEXT: [[TMP3:%.*]] = and i1 [[CMP4]], [[CMP3]] +; CHECK-T1-NEXT: br i1 [[TMP3]], label [[WHILE_COND5_PREHEADER]], label [[WHILE_END13_LOOPEXIT:%.*]] ; CHECK-T1: while.end13.loopexit: ; CHECK-T1-NEXT: [[INCDEC_PTR11_LCSSA:%.*]] = phi i16* [ [[INCDEC_PTR11]], [[WHILE_END]] ] ; CHECK-T1-NEXT: [[ADD_PTR_LCSSA:%.*]] = phi i16* [ [[ADD_PTR]], [[WHILE_END]] ] +; CHECK-T1-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_END]] ] ; CHECK-T1-NEXT: [[DEC12_LCSSA:%.*]] = phi i32 [ [[DEC12]], [[WHILE_END]] ] -; CHECK-T1-NEXT: [[TMP6:%.*]] = add nuw nsw i32 [[UMIN]], 2 ; CHECK-T1-NEXT: br label [[WHILE_END13]] ; CHECK-T1: while.end13: ; CHECK-T1-NEXT: [[POUT_0_LCSSA:%.*]] = phi i16* [ [[PDST]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR11_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] ; CHECK-T1-NEXT: [[PY_0_LCSSA:%.*]] = phi i16* [ [[PSRCA_PSRCB]], [[ENTRY]] ], [ [[ADD_PTR_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] ; CHECK-T1-NEXT: [[BLOCKSIZE1_0_LCSSA:%.*]] = phi i32 [ [[SUB]], [[ENTRY]] ], [ [[DEC12_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] -; CHECK-T1-NEXT: [[COUNT_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP6]], [[WHILE_END13_LOOPEXIT]] ] +; CHECK-T1-NEXT: [[COUNT_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[INC_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] ; CHECK-T1-NEXT: [[CMP161068:%.*]] = icmp eq i32 [[BLOCKSIZE1_0_LCSSA]], 0 ; CHECK-T1-NEXT: br i1 [[CMP161068]], label [[EXIT:%.*]], label [[WHILE_BODY18_PREHEADER:%.*]] ; CHECK-T1: while.body18.preheader: @@ -88,34 +85,34 @@ ; CHECK-T1-NEXT: [[PY_31056:%.*]] = phi i16* [ [[ADD_PTR_I884:%.*]], [[WHILE_BODY23]] ], [ [[PY_21070]], [[WHILE_BODY23_PREHEADER]] ] ; CHECK-T1-NEXT: [[PX_31055:%.*]] = phi i16* [ [[ADD_PTR_I890:%.*]], [[WHILE_BODY23]] ], [ [[PSRCB_PSRCA]], [[WHILE_BODY23_PREHEADER]] ] ; CHECK-T1-NEXT: [[ARRAYIDX_I907:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 1 -; CHECK-T1-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX_I907]], align 2 -; CHECK-T1-NEXT: [[TMP8:%.*]] = load i16, i16* [[PX_31055]], align 2 +; CHECK-T1-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_I907]], align 2 +; CHECK-T1-NEXT: [[TMP5:%.*]] = load i16, i16* [[PX_31055]], align 2 ; CHECK-T1-NEXT: [[ADD_PTR_I912:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 2 ; CHECK-T1-NEXT: [[ARRAYIDX_I901:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 1 -; CHECK-T1-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX_I901]], align 2 -; CHECK-T1-NEXT: [[TMP10:%.*]] = load i16, i16* [[PY_31056]], align 2 +; CHECK-T1-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_I901]], align 2 +; CHECK-T1-NEXT: [[TMP7:%.*]] = load i16, i16* [[PY_31056]], align 2 ; CHECK-T1-NEXT: [[ADD_PTR_I906:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -2 -; CHECK-T1-NEXT: [[SHR_I892:%.*]] = sext i16 [[TMP8]] to i32 -; CHECK-T1-NEXT: [[SHR1_I893:%.*]] = sext i16 [[TMP9]] to i32 +; CHECK-T1-NEXT: [[SHR_I892:%.*]] = sext i16 [[TMP5]] to i32 +; CHECK-T1-NEXT: [[SHR1_I893:%.*]] = sext i16 [[TMP6]] to i32 ; CHECK-T1-NEXT: [[MUL_I894:%.*]] = mul nsw i32 [[SHR1_I893]], [[SHR_I892]] -; CHECK-T1-NEXT: [[SHR2_I895:%.*]] = sext i16 [[TMP7]] to i32 -; CHECK-T1-NEXT: [[SHR4_I897:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-T1-NEXT: [[SHR2_I895:%.*]] = sext i16 [[TMP4]] to i32 +; CHECK-T1-NEXT: [[SHR4_I897:%.*]] = sext i16 [[TMP7]] to i32 ; CHECK-T1-NEXT: [[MUL5_I898:%.*]] = mul nsw i32 [[SHR4_I897]], [[SHR2_I895]] ; CHECK-T1-NEXT: [[ADD_I899:%.*]] = add i32 [[MUL_I894]], [[SUM_11057]] ; CHECK-T1-NEXT: [[ADD6_I900:%.*]] = add i32 [[ADD_I899]], [[MUL5_I898]] ; CHECK-T1-NEXT: [[ARRAYIDX_I885:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 3 -; CHECK-T1-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX_I885]], align 2 -; CHECK-T1-NEXT: [[TMP12:%.*]] = load i16, i16* [[ADD_PTR_I912]], align 2 +; CHECK-T1-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX_I885]], align 2 +; CHECK-T1-NEXT: [[TMP9:%.*]] = load i16, i16* [[ADD_PTR_I912]], align 2 ; CHECK-T1-NEXT: [[ADD_PTR_I890]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 4 ; CHECK-T1-NEXT: [[ARRAYIDX_I879:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -1 -; CHECK-T1-NEXT: [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX_I879]], align 2 -; CHECK-T1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ADD_PTR_I906]], align 2 +; CHECK-T1-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_I879]], align 2 +; CHECK-T1-NEXT: [[TMP11:%.*]] = load i16, i16* [[ADD_PTR_I906]], align 2 ; CHECK-T1-NEXT: [[ADD_PTR_I884]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -4 -; CHECK-T1-NEXT: [[SHR_I870:%.*]] = sext i16 [[TMP12]] to i32 -; CHECK-T1-NEXT: [[SHR1_I871:%.*]] = sext i16 [[TMP13]] to i32 +; CHECK-T1-NEXT: [[SHR_I870:%.*]] = sext i16 [[TMP9]] to i32 +; CHECK-T1-NEXT: [[SHR1_I871:%.*]] = sext i16 [[TMP10]] to i32 ; CHECK-T1-NEXT: [[MUL_I872:%.*]] = mul nsw i32 [[SHR1_I871]], [[SHR_I870]] -; CHECK-T1-NEXT: [[SHR2_I873:%.*]] = sext i16 [[TMP11]] to i32 -; CHECK-T1-NEXT: [[SHR4_I875:%.*]] = sext i16 [[TMP14]] to i32 +; CHECK-T1-NEXT: [[SHR2_I873:%.*]] = sext i16 [[TMP8]] to i32 +; CHECK-T1-NEXT: [[SHR4_I875:%.*]] = sext i16 [[TMP11]] to i32 ; CHECK-T1-NEXT: [[MUL5_I876:%.*]] = mul nsw i32 [[SHR4_I875]], [[SHR2_I873]] ; CHECK-T1-NEXT: [[ADD_I877:%.*]] = add i32 [[ADD6_I900]], [[MUL_I872]] ; CHECK-T1-NEXT: [[ADD6_I878]] = add i32 [[ADD_I877]], [[MUL5_I876]] @@ -143,11 +140,11 @@ ; CHECK-T1-NEXT: [[PY_41064:%.*]] = phi i16* [ [[INCDEC_PTR39:%.*]], [[WHILE_BODY36]] ], [ [[ADD_PTR32]], [[WHILE_BODY36_PREHEADER]] ] ; CHECK-T1-NEXT: [[PX_41063:%.*]] = phi i16* [ [[INCDEC_PTR37:%.*]], [[WHILE_BODY36]] ], [ [[PX_3_LCSSA]], [[WHILE_BODY36_PREHEADER]] ] ; CHECK-T1-NEXT: [[INCDEC_PTR37]] = getelementptr inbounds i16, i16* [[PX_41063]], i32 1 -; CHECK-T1-NEXT: [[TMP15:%.*]] = load i16, i16* [[PX_41063]], align 2 -; CHECK-T1-NEXT: [[CONV38:%.*]] = sext i16 [[TMP15]] to i32 +; CHECK-T1-NEXT: [[TMP12:%.*]] = load i16, i16* [[PX_41063]], align 2 +; CHECK-T1-NEXT: [[CONV38:%.*]] = sext i16 [[TMP12]] to i32 ; CHECK-T1-NEXT: [[INCDEC_PTR39]] = getelementptr inbounds i16, i16* [[PY_41064]], i32 -1 -; CHECK-T1-NEXT: [[TMP16:%.*]] = load i16, i16* [[PY_41064]], align 2 -; CHECK-T1-NEXT: [[CONV40:%.*]] = sext i16 [[TMP16]] to i32 +; CHECK-T1-NEXT: [[TMP13:%.*]] = load i16, i16* [[PY_41064]], align 2 +; CHECK-T1-NEXT: [[CONV40:%.*]] = sext i16 [[TMP13]] to i32 ; CHECK-T1-NEXT: [[MUL_I863:%.*]] = mul nsw i32 [[CONV40]], [[CONV38]] ; CHECK-T1-NEXT: [[SHR3_I864:%.*]] = ashr i32 [[CONV38]], 16 ; CHECK-T1-NEXT: [[SHR4_I865:%.*]] = ashr i32 [[CONV40]], 16 @@ -162,8 +159,8 @@ ; CHECK-T1-NEXT: br label [[WHILE_END43]] ; CHECK-T1: while.end43: ; CHECK-T1-NEXT: [[SUM_2_LCSSA:%.*]] = phi i32 [ [[SUM_1_LCSSA]], [[WHILE_END31]] ], [ [[ADD6_I868_LCSSA]], [[WHILE_END43_LOOPEXIT]] ] -; CHECK-T1-NEXT: [[TMP17:%.*]] = lshr i32 [[SUM_2_LCSSA]], 15 -; CHECK-T1-NEXT: [[CONV45:%.*]] = trunc i32 [[TMP17]] to i16 +; CHECK-T1-NEXT: [[TMP14:%.*]] = lshr i32 [[SUM_2_LCSSA]], 15 +; CHECK-T1-NEXT: [[CONV45:%.*]] = trunc i32 [[TMP14]] to i16 ; CHECK-T1-NEXT: [[INCDEC_PTR46]] = getelementptr inbounds i16, i16* [[POUT_11069]], i32 1 ; CHECK-T1-NEXT: store i16 [[CONV45]], i16* [[POUT_11069]], align 2 ; CHECK-T1-NEXT: [[SUB47:%.*]] = add i32 [[COUNT_11072]], -1 @@ -187,9 +184,6 @@ ; CHECK-T2-NEXT: [[CMP41080:%.*]] = icmp eq i32 [[SUB]], 0 ; CHECK-T2-NEXT: br i1 [[CMP41080]], label [[WHILE_END13:%.*]], label [[WHILE_COND5_PREHEADER_PREHEADER:%.*]] ; CHECK-T2: while.cond5.preheader.preheader: -; CHECK-T2-NEXT: [[TMP0:%.*]] = add i32 [[SRCALEN_SRCBLEN]], -2 -; CHECK-T2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 2 -; CHECK-T2-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 2 ; CHECK-T2-NEXT: br label [[WHILE_COND5_PREHEADER:%.*]] ; CHECK-T2: while.cond5.preheader: ; CHECK-T2-NEXT: [[COUNT_01084:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_END:%.*]] ], [ 1, [[WHILE_COND5_PREHEADER_PREHEADER]] ] @@ -203,11 +197,11 @@ ; CHECK-T2-NEXT: [[PY_11076:%.*]] = phi i16* [ [[INCDEC_PTR8:%.*]], [[WHILE_BODY7]] ], [ [[PY_01082]], [[WHILE_COND5_PREHEADER]] ] ; CHECK-T2-NEXT: [[PX_11075:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY7]] ], [ [[PSRCB_PSRCA]], [[WHILE_COND5_PREHEADER]] ] ; CHECK-T2-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[PX_11075]], i32 1 -; CHECK-T2-NEXT: [[TMP2:%.*]] = load i16, i16* [[PX_11075]], align 2 -; CHECK-T2-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +; CHECK-T2-NEXT: [[TMP0:%.*]] = load i16, i16* [[PX_11075]], align 2 +; CHECK-T2-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-T2-NEXT: [[INCDEC_PTR8]] = getelementptr inbounds i16, i16* [[PY_11076]], i32 -1 -; CHECK-T2-NEXT: [[TMP3:%.*]] = load i16, i16* [[PY_11076]], align 2 -; CHECK-T2-NEXT: [[CONV9:%.*]] = sext i16 [[TMP3]] to i32 +; CHECK-T2-NEXT: [[TMP1:%.*]] = load i16, i16* [[PY_11076]], align 2 +; CHECK-T2-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 ; CHECK-T2-NEXT: [[MUL_I:%.*]] = mul nsw i32 [[CONV9]], [[CONV]] ; CHECK-T2-NEXT: [[SHR3_I:%.*]] = ashr i32 [[CONV]], 16 ; CHECK-T2-NEXT: [[SHR4_I:%.*]] = ashr i32 [[CONV9]], 16 @@ -219,8 +213,8 @@ ; CHECK-T2-NEXT: br i1 [[CMP6]], label [[WHILE_END]], label [[WHILE_BODY7]] ; CHECK-T2: while.end: ; CHECK-T2-NEXT: [[ADD6_I_LCSSA:%.*]] = phi i32 [ [[ADD6_I]], [[WHILE_BODY7]] ] -; CHECK-T2-NEXT: [[TMP4:%.*]] = lshr i32 [[ADD6_I_LCSSA]], 15 -; CHECK-T2-NEXT: [[CONV10:%.*]] = trunc i32 [[TMP4]] to i16 +; CHECK-T2-NEXT: [[TMP2:%.*]] = lshr i32 [[ADD6_I_LCSSA]], 15 +; CHECK-T2-NEXT: [[CONV10:%.*]] = trunc i32 [[TMP2]] to i16 ; CHECK-T2-NEXT: [[INCDEC_PTR11]] = getelementptr inbounds i16, i16* [[POUT_01081]], i32 1 ; CHECK-T2-NEXT: store i16 [[CONV10]], i16* [[POUT_01081]], align 2 ; CHECK-T2-NEXT: [[ADD_PTR]] = getelementptr inbounds i16, i16* [[PSRCA_PSRCB]], i32 [[COUNT_01084]] @@ -228,19 +222,19 @@ ; CHECK-T2-NEXT: [[DEC12]] = add i32 [[BLOCKSIZE1_01083]], -1 ; CHECK-T2-NEXT: [[CMP3:%.*]] = icmp ult i32 [[COUNT_01084]], 3 ; CHECK-T2-NEXT: [[CMP4:%.*]] = icmp ne i32 [[DEC12]], 0 -; CHECK-T2-NEXT: [[TMP5:%.*]] = and i1 [[CMP4]], [[CMP3]] -; CHECK-T2-NEXT: br i1 [[TMP5]], label [[WHILE_COND5_PREHEADER]], label [[WHILE_END13_LOOPEXIT:%.*]] +; CHECK-T2-NEXT: [[TMP3:%.*]] = and i1 [[CMP4]], [[CMP3]] +; CHECK-T2-NEXT: br i1 [[TMP3]], label [[WHILE_COND5_PREHEADER]], label [[WHILE_END13_LOOPEXIT:%.*]] ; CHECK-T2: while.end13.loopexit: ; CHECK-T2-NEXT: [[INCDEC_PTR11_LCSSA:%.*]] = phi i16* [ [[INCDEC_PTR11]], [[WHILE_END]] ] ; CHECK-T2-NEXT: [[ADD_PTR_LCSSA:%.*]] = phi i16* [ [[ADD_PTR]], [[WHILE_END]] ] +; CHECK-T2-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_END]] ] ; CHECK-T2-NEXT: [[DEC12_LCSSA:%.*]] = phi i32 [ [[DEC12]], [[WHILE_END]] ] -; CHECK-T2-NEXT: [[TMP6:%.*]] = add nuw nsw i32 [[UMIN]], 2 ; CHECK-T2-NEXT: br label [[WHILE_END13]] ; CHECK-T2: while.end13: ; CHECK-T2-NEXT: [[POUT_0_LCSSA:%.*]] = phi i16* [ [[PDST]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR11_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] ; CHECK-T2-NEXT: [[PY_0_LCSSA:%.*]] = phi i16* [ [[PSRCA_PSRCB]], [[ENTRY]] ], [ [[ADD_PTR_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] ; CHECK-T2-NEXT: [[BLOCKSIZE1_0_LCSSA:%.*]] = phi i32 [ [[SUB]], [[ENTRY]] ], [ [[DEC12_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] -; CHECK-T2-NEXT: [[COUNT_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP6]], [[WHILE_END13_LOOPEXIT]] ] +; CHECK-T2-NEXT: [[COUNT_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[INC_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] ; CHECK-T2-NEXT: [[CMP161068:%.*]] = icmp eq i32 [[BLOCKSIZE1_0_LCSSA]], 0 ; CHECK-T2-NEXT: br i1 [[CMP161068]], label [[EXIT:%.*]], label [[WHILE_BODY18_PREHEADER:%.*]] ; CHECK-T2: while.body18.preheader: @@ -262,34 +256,34 @@ ; CHECK-T2-NEXT: [[PY_31056:%.*]] = phi i16* [ [[ADD_PTR_I884:%.*]], [[WHILE_BODY23]] ], [ [[PY_21070]], [[WHILE_BODY23_PREHEADER]] ] ; CHECK-T2-NEXT: [[PX_31055:%.*]] = phi i16* [ [[ADD_PTR_I890:%.*]], [[WHILE_BODY23]] ], [ [[PSRCB_PSRCA]], [[WHILE_BODY23_PREHEADER]] ] ; CHECK-T2-NEXT: [[ARRAYIDX_I907:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 1 -; CHECK-T2-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX_I907]], align 2 -; CHECK-T2-NEXT: [[TMP8:%.*]] = load i16, i16* [[PX_31055]], align 2 +; CHECK-T2-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_I907]], align 2 +; CHECK-T2-NEXT: [[TMP5:%.*]] = load i16, i16* [[PX_31055]], align 2 ; CHECK-T2-NEXT: [[ADD_PTR_I912:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 2 ; CHECK-T2-NEXT: [[ARRAYIDX_I901:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 1 -; CHECK-T2-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX_I901]], align 2 -; CHECK-T2-NEXT: [[TMP10:%.*]] = load i16, i16* [[PY_31056]], align 2 +; CHECK-T2-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_I901]], align 2 +; CHECK-T2-NEXT: [[TMP7:%.*]] = load i16, i16* [[PY_31056]], align 2 ; CHECK-T2-NEXT: [[ADD_PTR_I906:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -2 -; CHECK-T2-NEXT: [[SHR_I892:%.*]] = sext i16 [[TMP8]] to i32 -; CHECK-T2-NEXT: [[SHR1_I893:%.*]] = sext i16 [[TMP9]] to i32 +; CHECK-T2-NEXT: [[SHR_I892:%.*]] = sext i16 [[TMP5]] to i32 +; CHECK-T2-NEXT: [[SHR1_I893:%.*]] = sext i16 [[TMP6]] to i32 ; CHECK-T2-NEXT: [[MUL_I894:%.*]] = mul nsw i32 [[SHR1_I893]], [[SHR_I892]] -; CHECK-T2-NEXT: [[SHR2_I895:%.*]] = sext i16 [[TMP7]] to i32 -; CHECK-T2-NEXT: [[SHR4_I897:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-T2-NEXT: [[SHR2_I895:%.*]] = sext i16 [[TMP4]] to i32 +; CHECK-T2-NEXT: [[SHR4_I897:%.*]] = sext i16 [[TMP7]] to i32 ; CHECK-T2-NEXT: [[MUL5_I898:%.*]] = mul nsw i32 [[SHR4_I897]], [[SHR2_I895]] ; CHECK-T2-NEXT: [[ADD_I899:%.*]] = add i32 [[MUL_I894]], [[SUM_11057]] ; CHECK-T2-NEXT: [[ADD6_I900:%.*]] = add i32 [[ADD_I899]], [[MUL5_I898]] ; CHECK-T2-NEXT: [[ARRAYIDX_I885:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 3 -; CHECK-T2-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX_I885]], align 2 -; CHECK-T2-NEXT: [[TMP12:%.*]] = load i16, i16* [[ADD_PTR_I912]], align 2 +; CHECK-T2-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX_I885]], align 2 +; CHECK-T2-NEXT: [[TMP9:%.*]] = load i16, i16* [[ADD_PTR_I912]], align 2 ; CHECK-T2-NEXT: [[ADD_PTR_I890]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 4 ; CHECK-T2-NEXT: [[ARRAYIDX_I879:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -1 -; CHECK-T2-NEXT: [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX_I879]], align 2 -; CHECK-T2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ADD_PTR_I906]], align 2 +; CHECK-T2-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_I879]], align 2 +; CHECK-T2-NEXT: [[TMP11:%.*]] = load i16, i16* [[ADD_PTR_I906]], align 2 ; CHECK-T2-NEXT: [[ADD_PTR_I884]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -4 -; CHECK-T2-NEXT: [[SHR_I870:%.*]] = sext i16 [[TMP12]] to i32 -; CHECK-T2-NEXT: [[SHR1_I871:%.*]] = sext i16 [[TMP13]] to i32 +; CHECK-T2-NEXT: [[SHR_I870:%.*]] = sext i16 [[TMP9]] to i32 +; CHECK-T2-NEXT: [[SHR1_I871:%.*]] = sext i16 [[TMP10]] to i32 ; CHECK-T2-NEXT: [[MUL_I872:%.*]] = mul nsw i32 [[SHR1_I871]], [[SHR_I870]] -; CHECK-T2-NEXT: [[SHR2_I873:%.*]] = sext i16 [[TMP11]] to i32 -; CHECK-T2-NEXT: [[SHR4_I875:%.*]] = sext i16 [[TMP14]] to i32 +; CHECK-T2-NEXT: [[SHR2_I873:%.*]] = sext i16 [[TMP8]] to i32 +; CHECK-T2-NEXT: [[SHR4_I875:%.*]] = sext i16 [[TMP11]] to i32 ; CHECK-T2-NEXT: [[MUL5_I876:%.*]] = mul nsw i32 [[SHR4_I875]], [[SHR2_I873]] ; CHECK-T2-NEXT: [[ADD_I877:%.*]] = add i32 [[ADD6_I900]], [[MUL_I872]] ; CHECK-T2-NEXT: [[ADD6_I878]] = add i32 [[ADD_I877]], [[MUL5_I876]] @@ -317,11 +311,11 @@ ; CHECK-T2-NEXT: [[PY_41064:%.*]] = phi i16* [ [[INCDEC_PTR39:%.*]], [[WHILE_BODY36]] ], [ [[ADD_PTR32]], [[WHILE_BODY36_PREHEADER]] ] ; CHECK-T2-NEXT: [[PX_41063:%.*]] = phi i16* [ [[INCDEC_PTR37:%.*]], [[WHILE_BODY36]] ], [ [[PX_3_LCSSA]], [[WHILE_BODY36_PREHEADER]] ] ; CHECK-T2-NEXT: [[INCDEC_PTR37]] = getelementptr inbounds i16, i16* [[PX_41063]], i32 1 -; CHECK-T2-NEXT: [[TMP15:%.*]] = load i16, i16* [[PX_41063]], align 2 -; CHECK-T2-NEXT: [[CONV38:%.*]] = sext i16 [[TMP15]] to i32 +; CHECK-T2-NEXT: [[TMP12:%.*]] = load i16, i16* [[PX_41063]], align 2 +; CHECK-T2-NEXT: [[CONV38:%.*]] = sext i16 [[TMP12]] to i32 ; CHECK-T2-NEXT: [[INCDEC_PTR39]] = getelementptr inbounds i16, i16* [[PY_41064]], i32 -1 -; CHECK-T2-NEXT: [[TMP16:%.*]] = load i16, i16* [[PY_41064]], align 2 -; CHECK-T2-NEXT: [[CONV40:%.*]] = sext i16 [[TMP16]] to i32 +; CHECK-T2-NEXT: [[TMP13:%.*]] = load i16, i16* [[PY_41064]], align 2 +; CHECK-T2-NEXT: [[CONV40:%.*]] = sext i16 [[TMP13]] to i32 ; CHECK-T2-NEXT: [[MUL_I863:%.*]] = mul nsw i32 [[CONV40]], [[CONV38]] ; CHECK-T2-NEXT: [[SHR3_I864:%.*]] = ashr i32 [[CONV38]], 16 ; CHECK-T2-NEXT: [[SHR4_I865:%.*]] = ashr i32 [[CONV40]], 16 @@ -336,8 +330,8 @@ ; CHECK-T2-NEXT: br label [[WHILE_END43]] ; CHECK-T2: while.end43: ; CHECK-T2-NEXT: [[SUM_2_LCSSA:%.*]] = phi i32 [ [[SUM_1_LCSSA]], [[WHILE_END31]] ], [ [[ADD6_I868_LCSSA]], [[WHILE_END43_LOOPEXIT]] ] -; CHECK-T2-NEXT: [[TMP17:%.*]] = lshr i32 [[SUM_2_LCSSA]], 15 -; CHECK-T2-NEXT: [[CONV45:%.*]] = trunc i32 [[TMP17]] to i16 +; CHECK-T2-NEXT: [[TMP14:%.*]] = lshr i32 [[SUM_2_LCSSA]], 15 +; CHECK-T2-NEXT: [[CONV45:%.*]] = trunc i32 [[TMP14]] to i16 ; CHECK-T2-NEXT: [[INCDEC_PTR46]] = getelementptr inbounds i16, i16* [[POUT_11069]], i32 1 ; CHECK-T2-NEXT: store i16 [[CONV45]], i16* [[POUT_11069]], align 2 ; CHECK-T2-NEXT: [[SUB47:%.*]] = add i32 [[COUNT_11072]], -1 Index: llvm/test/CodeGen/ARM/indvar-unroll-imm-cost.ll =================================================================== --- llvm/test/CodeGen/ARM/indvar-unroll-imm-cost.ll +++ llvm/test/CodeGen/ARM/indvar-unroll-imm-cost.ll @@ -18,258 +18,90 @@ ; CHECK-NEXT: [[PSRCA_ADDR_090:%.*]] = phi i16* [ [[PSRCA_ADDR_2_LCSSA:%.*]], [[FOR_END40]] ], [ [[PSRCA:%.*]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[PSRCB_ADDR_089:%.*]] = phi i16* [ [[PSRCB_ADDR_2_LCSSA:%.*]], [[FOR_END40]] ], [ [[PSRCB:%.*]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[TMP0:%.*]] = lshr i32 [[I_092]], 2 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[I_092]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 3 -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 2147483644 -; CHECK-NEXT: [[CMP272:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483644 +; CHECK-NEXT: [[CMP272:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[CMP272]], label [[FOR_END:%.*]], label [[FOR_BODY3_PREHEADER:%.*]] ; CHECK: for.body3.preheader: -; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP3]], 3 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP2]], 3 -; CHECK-NEXT: br i1 [[TMP7]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY3_PREHEADER_NEW:%.*]] -; CHECK: for.body3.preheader.new: -; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[TMP3]], [[XTRAITER]] ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] ; CHECK: for.body3: -; CHECK-NEXT: [[J_076:%.*]] = phi i32 [ 0, [[FOR_BODY3_PREHEADER_NEW]] ], [ [[ADD24_3:%.*]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[PDEST_ADDR_175:%.*]] = phi i32* [ [[PDEST_ADDR_091]], [[FOR_BODY3_PREHEADER_NEW]] ], [ [[INCDEC_PTR_3:%.*]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[PSRCA_ADDR_174:%.*]] = phi i16* [ [[PSRCA_ADDR_090]], [[FOR_BODY3_PREHEADER_NEW]] ], [ [[ADD_PTR_3:%.*]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[PSRCB_ADDR_173:%.*]] = phi i16* [ [[PSRCB_ADDR_089]], [[FOR_BODY3_PREHEADER_NEW]] ], [ [[ADD_PTR23_3:%.*]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY3_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[PSRCA_ADDR_174]], align 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 -; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[PSRCB_ADDR_173]], align 2 -; CHECK-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 +; CHECK-NEXT: [[J_076:%.*]] = phi i32 [ [[ADD24:%.*]], [[FOR_BODY3]] ], [ 0, [[FOR_BODY3_PREHEADER]] ] +; CHECK-NEXT: [[PDEST_ADDR_175:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY3]] ], [ [[PDEST_ADDR_091]], [[FOR_BODY3_PREHEADER]] ] +; CHECK-NEXT: [[PSRCA_ADDR_174:%.*]] = phi i16* [ [[ADD_PTR:%.*]], [[FOR_BODY3]] ], [ [[PSRCA_ADDR_090]], [[FOR_BODY3_PREHEADER]] ] +; CHECK-NEXT: [[PSRCB_ADDR_173:%.*]] = phi i16* [ [[ADD_PTR23:%.*]], [[FOR_BODY3]] ], [ [[PSRCB_ADDR_089]], [[FOR_BODY3_PREHEADER]] ] +; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[PSRCA_ADDR_174]], align 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[PSRCB_ADDR_173]], align 2 +; CHECK-NEXT: [[CONV5:%.*]] = sext i16 [[TMP4]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV5]], [[CONV]] ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 -; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 +; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP5]] to i32 ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX8]], align 2 -; CHECK-NEXT: [[CONV9:%.*]] = sext i16 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX8]], align 2 +; CHECK-NEXT: [[CONV9:%.*]] = sext i16 [[TMP6]] to i32 ; CHECK-NEXT: [[MUL10:%.*]] = mul nsw i32 [[CONV9]], [[CONV7]] ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 2 -; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX11]], align 2 -; CHECK-NEXT: [[CONV12:%.*]] = sext i16 [[TMP12]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX11]], align 2 +; CHECK-NEXT: [[CONV12:%.*]] = sext i16 [[TMP7]] to i32 ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX13]], align 2 -; CHECK-NEXT: [[CONV14:%.*]] = sext i16 [[TMP13]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX13]], align 2 +; CHECK-NEXT: [[CONV14:%.*]] = sext i16 [[TMP8]] to i32 ; CHECK-NEXT: [[MUL15:%.*]] = mul nsw i32 [[CONV14]], [[CONV12]] ; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 3 -; CHECK-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX17]], align 2 -; CHECK-NEXT: [[CONV18:%.*]] = sext i16 [[TMP14]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX17]], align 2 +; CHECK-NEXT: [[CONV18:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[ADD21:%.*]] = add i32 [[MUL10]], [[MUL]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[ADD21]], [[CONV14]] ; CHECK-NEXT: [[ADD16:%.*]] = add i32 [[ADD]], [[MUL15]] ; CHECK-NEXT: [[ADD22:%.*]] = add i32 [[ADD16]], [[CONV18]] ; CHECK-NEXT: store i32 [[ADD22]], i32* [[PDEST_ADDR_175]], align 4 -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 4 -; CHECK-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_175]], i32 1 -; CHECK-NEXT: [[ADD24:%.*]] = add nuw nsw i32 [[J_076]], 4 -; CHECK-NEXT: [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1 -; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[ADD_PTR]], align 2 -; CHECK-NEXT: [[CONV_1:%.*]] = sext i16 [[TMP15]] to i32 -; CHECK-NEXT: [[TMP16:%.*]] = load i16, i16* [[ADD_PTR23]], align 2 -; CHECK-NEXT: [[CONV5_1:%.*]] = sext i16 [[TMP16]] to i32 -; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[CONV5_1]], [[CONV_1]] -; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i32 1 -; CHECK-NEXT: [[TMP17:%.*]] = load i16, i16* [[ARRAYIDX6_1]], align 2 -; CHECK-NEXT: [[CONV7_1:%.*]] = sext i16 [[TMP17]] to i32 -; CHECK-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23]], i32 1 -; CHECK-NEXT: [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX8_1]], align 2 -; CHECK-NEXT: [[CONV9_1:%.*]] = sext i16 [[TMP18]] to i32 -; CHECK-NEXT: [[MUL10_1:%.*]] = mul nsw i32 [[CONV9_1]], [[CONV7_1]] -; CHECK-NEXT: [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i32 2 -; CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[ARRAYIDX11_1]], align 2 -; CHECK-NEXT: [[CONV12_1:%.*]] = sext i16 [[TMP19]] to i32 -; CHECK-NEXT: [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23]], i32 3 -; CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[ARRAYIDX13_1]], align 2 -; CHECK-NEXT: [[CONV14_1:%.*]] = sext i16 [[TMP20]] to i32 -; CHECK-NEXT: [[MUL15_1:%.*]] = mul nsw i32 [[CONV14_1]], [[CONV12_1]] -; CHECK-NEXT: [[ARRAYIDX17_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i32 3 -; CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[ARRAYIDX17_1]], align 2 -; CHECK-NEXT: [[CONV18_1:%.*]] = sext i16 [[TMP21]] to i32 -; CHECK-NEXT: [[ADD21_1:%.*]] = add i32 [[MUL10_1]], [[MUL_1]] -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD21_1]], [[CONV14_1]] -; CHECK-NEXT: [[ADD16_1:%.*]] = add i32 [[ADD_1]], [[MUL15_1]] -; CHECK-NEXT: [[ADD22_1:%.*]] = add i32 [[ADD16_1]], [[CONV18_1]] -; CHECK-NEXT: store i32 [[ADD22_1]], i32* [[INCDEC_PTR]], align 4 -; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i32 4 -; CHECK-NEXT: [[ADD_PTR23_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR_1:%.*]] = getelementptr inbounds i32, i32* [[INCDEC_PTR]], i32 1 -; CHECK-NEXT: [[ADD24_1:%.*]] = add nuw nsw i32 [[ADD24]], 4 -; CHECK-NEXT: [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[ADD_PTR_1]], align 2 -; CHECK-NEXT: [[CONV_2:%.*]] = sext i16 [[TMP22]] to i32 -; CHECK-NEXT: [[TMP23:%.*]] = load i16, i16* [[ADD_PTR23_1]], align 2 -; CHECK-NEXT: [[CONV5_2:%.*]] = sext i16 [[TMP23]] to i32 -; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[CONV5_2]], [[CONV_2]] -; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i32 1 -; CHECK-NEXT: [[TMP24:%.*]] = load i16, i16* [[ARRAYIDX6_2]], align 2 -; CHECK-NEXT: [[CONV7_2:%.*]] = sext i16 [[TMP24]] to i32 -; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_1]], i32 1 -; CHECK-NEXT: [[TMP25:%.*]] = load i16, i16* [[ARRAYIDX8_2]], align 2 -; CHECK-NEXT: [[CONV9_2:%.*]] = sext i16 [[TMP25]] to i32 -; CHECK-NEXT: [[MUL10_2:%.*]] = mul nsw i32 [[CONV9_2]], [[CONV7_2]] -; CHECK-NEXT: [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i32 2 -; CHECK-NEXT: [[TMP26:%.*]] = load i16, i16* [[ARRAYIDX11_2]], align 2 -; CHECK-NEXT: [[CONV12_2:%.*]] = sext i16 [[TMP26]] to i32 -; CHECK-NEXT: [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_1]], i32 3 -; CHECK-NEXT: [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX13_2]], align 2 -; CHECK-NEXT: [[CONV14_2:%.*]] = sext i16 [[TMP27]] to i32 -; CHECK-NEXT: [[MUL15_2:%.*]] = mul nsw i32 [[CONV14_2]], [[CONV12_2]] -; CHECK-NEXT: [[ARRAYIDX17_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i32 3 -; CHECK-NEXT: [[TMP28:%.*]] = load i16, i16* [[ARRAYIDX17_2]], align 2 -; CHECK-NEXT: [[CONV18_2:%.*]] = sext i16 [[TMP28]] to i32 -; CHECK-NEXT: [[ADD21_2:%.*]] = add i32 [[MUL10_2]], [[MUL_2]] -; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD21_2]], [[CONV14_2]] -; CHECK-NEXT: [[ADD16_2:%.*]] = add i32 [[ADD_2]], [[MUL15_2]] -; CHECK-NEXT: [[ADD22_2:%.*]] = add i32 [[ADD16_2]], [[CONV18_2]] -; CHECK-NEXT: store i32 [[ADD22_2]], i32* [[INCDEC_PTR_1]], align 4 -; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i32 4 -; CHECK-NEXT: [[ADD_PTR23_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_1]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR_2:%.*]] = getelementptr inbounds i32, i32* [[INCDEC_PTR_1]], i32 1 -; CHECK-NEXT: [[ADD24_2:%.*]] = add nuw nsw i32 [[ADD24_1]], 4 -; CHECK-NEXT: [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1 -; CHECK-NEXT: [[TMP29:%.*]] = load i16, i16* [[ADD_PTR_2]], align 2 -; CHECK-NEXT: [[CONV_3:%.*]] = sext i16 [[TMP29]] to i32 -; CHECK-NEXT: [[TMP30:%.*]] = load i16, i16* [[ADD_PTR23_2]], align 2 -; CHECK-NEXT: [[CONV5_3:%.*]] = sext i16 [[TMP30]] to i32 -; CHECK-NEXT: [[MUL_3:%.*]] = mul nsw i32 [[CONV5_3]], [[CONV_3]] -; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i32 1 -; CHECK-NEXT: [[TMP31:%.*]] = load i16, i16* [[ARRAYIDX6_3]], align 2 -; CHECK-NEXT: [[CONV7_3:%.*]] = sext i16 [[TMP31]] to i32 -; CHECK-NEXT: [[ARRAYIDX8_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_2]], i32 1 -; CHECK-NEXT: [[TMP32:%.*]] = load i16, i16* [[ARRAYIDX8_3]], align 2 -; CHECK-NEXT: [[CONV9_3:%.*]] = sext i16 [[TMP32]] to i32 -; CHECK-NEXT: [[MUL10_3:%.*]] = mul nsw i32 [[CONV9_3]], [[CONV7_3]] -; CHECK-NEXT: [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i32 2 -; CHECK-NEXT: [[TMP33:%.*]] = load i16, i16* [[ARRAYIDX11_3]], align 2 -; CHECK-NEXT: [[CONV12_3:%.*]] = sext i16 [[TMP33]] to i32 -; CHECK-NEXT: [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_2]], i32 3 -; CHECK-NEXT: [[TMP34:%.*]] = load i16, i16* [[ARRAYIDX13_3]], align 2 -; CHECK-NEXT: [[CONV14_3:%.*]] = sext i16 [[TMP34]] to i32 -; CHECK-NEXT: [[MUL15_3:%.*]] = mul nsw i32 [[CONV14_3]], [[CONV12_3]] -; CHECK-NEXT: [[ARRAYIDX17_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i32 3 -; CHECK-NEXT: [[TMP35:%.*]] = load i16, i16* [[ARRAYIDX17_3]], align 2 -; CHECK-NEXT: [[CONV18_3:%.*]] = sext i16 [[TMP35]] to i32 -; CHECK-NEXT: [[ADD21_3:%.*]] = add i32 [[MUL10_3]], [[MUL_3]] -; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[ADD21_3]], [[CONV14_3]] -; CHECK-NEXT: [[ADD16_3:%.*]] = add i32 [[ADD_3]], [[MUL15_3]] -; CHECK-NEXT: [[ADD22_3:%.*]] = add i32 [[ADD16_3]], [[CONV18_3]] -; CHECK-NEXT: store i32 [[ADD22_3]], i32* [[INCDEC_PTR_2]], align 4 -; CHECK-NEXT: [[ADD_PTR_3]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i32 4 -; CHECK-NEXT: [[ADD_PTR23_3]] = getelementptr inbounds i16, i16* [[ADD_PTR23_2]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR_3]] = getelementptr inbounds i32, i32* [[INCDEC_PTR_2]], i32 1 -; CHECK-NEXT: [[ADD24_3]] = add nuw nsw i32 [[ADD24_2]], 4 -; CHECK-NEXT: [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1 -; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp ne i32 [[NITER_NSUB_3]], 0 -; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_BODY3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] -; CHECK: for.end.loopexit.unr-lcssa.loopexit: -; CHECK-NEXT: [[ADD_PTR_LCSSA_PH_PH:%.*]] = phi i16* [ [[ADD_PTR_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[ADD_PTR23_LCSSA_PH_PH:%.*]] = phi i16* [ [[ADD_PTR23_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[INCDEC_PTR_LCSSA_PH_PH:%.*]] = phi i32* [ [[INCDEC_PTR_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[J_076_UNR_PH:%.*]] = phi i32 [ [[ADD24_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[PDEST_ADDR_175_UNR_PH:%.*]] = phi i32* [ [[INCDEC_PTR_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[PSRCA_ADDR_174_UNR_PH:%.*]] = phi i16* [ [[ADD_PTR_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[PSRCB_ADDR_173_UNR_PH:%.*]] = phi i16* [ [[ADD_PTR23_3]], [[FOR_BODY3]] ] -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]] -; CHECK: for.end.loopexit.unr-lcssa: -; CHECK-NEXT: [[ADD_PTR_LCSSA_PH:%.*]] = phi i16* [ undef, [[FOR_BODY3_PREHEADER]] ], [ [[ADD_PTR_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[ADD_PTR23_LCSSA_PH:%.*]] = phi i16* [ undef, [[FOR_BODY3_PREHEADER]] ], [ [[ADD_PTR23_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[INCDEC_PTR_LCSSA_PH:%.*]] = phi i32* [ undef, [[FOR_BODY3_PREHEADER]] ], [ [[INCDEC_PTR_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[J_076_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY3_PREHEADER]] ], [ [[J_076_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[PDEST_ADDR_175_UNR:%.*]] = phi i32* [ [[PDEST_ADDR_091]], [[FOR_BODY3_PREHEADER]] ], [ [[PDEST_ADDR_175_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[PSRCA_ADDR_174_UNR:%.*]] = phi i16* [ [[PSRCA_ADDR_090]], [[FOR_BODY3_PREHEADER]] ], [ [[PSRCA_ADDR_174_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[PSRCB_ADDR_173_UNR:%.*]] = phi i16* [ [[PSRCB_ADDR_089]], [[FOR_BODY3_PREHEADER]] ], [ [[PSRCB_ADDR_173_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY3_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: for.body3.epil.preheader: -; CHECK-NEXT: br label [[FOR_BODY3_EPIL:%.*]] -; CHECK: for.body3.epil: -; CHECK-NEXT: [[TMP36:%.*]] = load i16, i16* [[PSRCA_ADDR_174_UNR]], align 2 -; CHECK-NEXT: [[CONV_EPIL:%.*]] = sext i16 [[TMP36]] to i32 -; CHECK-NEXT: [[TMP37:%.*]] = load i16, i16* [[PSRCB_ADDR_173_UNR]], align 2 -; CHECK-NEXT: [[CONV5_EPIL:%.*]] = sext i16 [[TMP37]] to i32 -; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul nsw i32 [[CONV5_EPIL]], [[CONV_EPIL]] -; CHECK-NEXT: [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174_UNR]], i32 1 -; CHECK-NEXT: [[TMP38:%.*]] = load i16, i16* [[ARRAYIDX6_EPIL]], align 2 -; CHECK-NEXT: [[CONV7_EPIL:%.*]] = sext i16 [[TMP38]] to i32 -; CHECK-NEXT: [[ARRAYIDX8_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173_UNR]], i32 1 -; CHECK-NEXT: [[TMP39:%.*]] = load i16, i16* [[ARRAYIDX8_EPIL]], align 2 -; CHECK-NEXT: [[CONV9_EPIL:%.*]] = sext i16 [[TMP39]] to i32 -; CHECK-NEXT: [[MUL10_EPIL:%.*]] = mul nsw i32 [[CONV9_EPIL]], [[CONV7_EPIL]] -; CHECK-NEXT: [[ARRAYIDX11_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174_UNR]], i32 2 -; CHECK-NEXT: [[TMP40:%.*]] = load i16, i16* [[ARRAYIDX11_EPIL]], align 2 -; CHECK-NEXT: [[CONV12_EPIL:%.*]] = sext i16 [[TMP40]] to i32 -; CHECK-NEXT: [[ARRAYIDX13_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173_UNR]], i32 3 -; CHECK-NEXT: [[TMP41:%.*]] = load i16, i16* [[ARRAYIDX13_EPIL]], align 2 -; CHECK-NEXT: [[CONV14_EPIL:%.*]] = sext i16 [[TMP41]] to i32 -; CHECK-NEXT: [[MUL15_EPIL:%.*]] = mul nsw i32 [[CONV14_EPIL]], [[CONV12_EPIL]] -; CHECK-NEXT: [[ARRAYIDX17_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174_UNR]], i32 3 -; CHECK-NEXT: [[TMP42:%.*]] = load i16, i16* [[ARRAYIDX17_EPIL]], align 2 -; CHECK-NEXT: [[CONV18_EPIL:%.*]] = sext i16 [[TMP42]] to i32 -; CHECK-NEXT: [[ADD21_EPIL:%.*]] = add i32 [[MUL10_EPIL]], [[MUL_EPIL]] -; CHECK-NEXT: [[ADD_EPIL:%.*]] = add i32 [[ADD21_EPIL]], [[CONV14_EPIL]] -; CHECK-NEXT: [[ADD16_EPIL:%.*]] = add i32 [[ADD_EPIL]], [[MUL15_EPIL]] -; CHECK-NEXT: [[ADD22_EPIL:%.*]] = add i32 [[ADD16_EPIL]], [[CONV18_EPIL]] -; CHECK-NEXT: store i32 [[ADD22_EPIL]], i32* [[PDEST_ADDR_175_UNR]], align 4 -; CHECK-NEXT: [[ADD_PTR_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174_UNR]], i32 4 -; CHECK-NEXT: [[ADD_PTR23_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173_UNR]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR_EPIL:%.*]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_175_UNR]], i32 1 -; CHECK-NEXT: [[ADD24_EPIL:%.*]] = add nuw nsw i32 [[J_076_UNR]], 4 -; CHECK-NEXT: [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1 -; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0 -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY3_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]] -; CHECK: for.end.loopexit.epilog-lcssa: -; CHECK-NEXT: [[ADD_PTR_LCSSA_PH1:%.*]] = phi i16* [ [[ADD_PTR_EPIL]], [[FOR_BODY3_EPIL]] ], [ [[ADD_PTR_EPIL_1:%.*]], [[FOR_BODY3_EPIL_1]] ], [ [[ADD_PTR_EPIL_2:%.*]], [[FOR_BODY3_EPIL_2:%.*]] ] -; CHECK-NEXT: [[ADD_PTR23_LCSSA_PH2:%.*]] = phi i16* [ [[ADD_PTR23_EPIL]], [[FOR_BODY3_EPIL]] ], [ [[ADD_PTR23_EPIL_1:%.*]], [[FOR_BODY3_EPIL_1]] ], [ [[ADD_PTR23_EPIL_2:%.*]], [[FOR_BODY3_EPIL_2]] ] -; CHECK-NEXT: [[INCDEC_PTR_LCSSA_PH3:%.*]] = phi i32* [ [[INCDEC_PTR_EPIL]], [[FOR_BODY3_EPIL]] ], [ [[INCDEC_PTR_EPIL_1:%.*]], [[FOR_BODY3_EPIL_1]] ], [ [[INCDEC_PTR_EPIL_2:%.*]], [[FOR_BODY3_EPIL_2]] ] -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 4 +; CHECK-NEXT: [[ADD_PTR23]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_175]], i32 1 +; CHECK-NEXT: [[ADD24]] = add nuw nsw i32 [[J_076]], 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[ADD24]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: -; CHECK-NEXT: [[ADD_PTR_LCSSA:%.*]] = phi i16* [ [[ADD_PTR_LCSSA_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_PTR_LCSSA_PH1]], [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] ] -; CHECK-NEXT: [[ADD_PTR23_LCSSA:%.*]] = phi i16* [ [[ADD_PTR23_LCSSA_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_PTR23_LCSSA_PH2]], [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] ] -; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i32* [ [[INCDEC_PTR_LCSSA_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ], [ [[INCDEC_PTR_LCSSA_PH3]], [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] ] +; CHECK-NEXT: [[ADD_PTR_LCSSA:%.*]] = phi i16* [ [[ADD_PTR]], [[FOR_BODY3]] ] +; CHECK-NEXT: [[ADD_PTR23_LCSSA:%.*]] = phi i16* [ [[ADD_PTR23]], [[FOR_BODY3]] ] +; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i32* [ [[INCDEC_PTR]], [[FOR_BODY3]] ] ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: [[PSRCB_ADDR_1_LCSSA:%.*]] = phi i16* [ [[PSRCB_ADDR_089]], [[FOR_BODY]] ], [ [[ADD_PTR23_LCSSA]], [[FOR_END_LOOPEXIT]] ] ; CHECK-NEXT: [[PSRCA_ADDR_1_LCSSA:%.*]] = phi i16* [ [[PSRCA_ADDR_090]], [[FOR_BODY]] ], [ [[ADD_PTR_LCSSA]], [[FOR_END_LOOPEXIT]] ] ; CHECK-NEXT: [[PDEST_ADDR_1_LCSSA:%.*]] = phi i32* [ [[PDEST_ADDR_091]], [[FOR_BODY]] ], [ [[INCDEC_PTR_LCSSA]], [[FOR_END_LOOPEXIT]] ] -; CHECK-NEXT: [[J_0_LCSSA:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[TMP6]], [[FOR_END_LOOPEXIT]] ] -; CHECK-NEXT: [[REM:%.*]] = and i32 [[TMP4]], 3 +; CHECK-NEXT: [[J_0_LCSSA:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[TMP2]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[REM:%.*]] = and i32 [[TMP0]], 3 ; CHECK-NEXT: [[ADD25:%.*]] = or i32 [[J_0_LCSSA]], [[REM]] ; CHECK-NEXT: [[CMP2780:%.*]] = icmp ugt i32 [[ADD25]], [[J_0_LCSSA]] ; CHECK-NEXT: br i1 [[CMP2780]], label [[FOR_BODY29_PREHEADER:%.*]], label [[FOR_END40]] ; CHECK: for.body29.preheader: -; CHECK-NEXT: [[TMP43:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]] -; CHECK-NEXT: [[TMP44:%.*]] = sub i32 [[ADD25]], [[J_0_LCSSA]] -; CHECK-NEXT: [[TMP45:%.*]] = add i32 [[ADD25]], -1 -; CHECK-NEXT: [[TMP46:%.*]] = sub i32 [[TMP45]], [[J_0_LCSSA]] -; CHECK-NEXT: [[XTRAITER4:%.*]] = and i32 [[TMP44]], 3 -; CHECK-NEXT: [[LCMP_MOD5:%.*]] = icmp ne i32 [[XTRAITER4]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD5]], label [[FOR_BODY29_PROL_PREHEADER:%.*]], label [[FOR_BODY29_PROL_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]] +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[ADD25]], [[J_0_LCSSA]] +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[ADD25]], -1 +; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], [[J_0_LCSSA]] +; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP11]], 3 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY29_PROL_PREHEADER:%.*]], label [[FOR_BODY29_PROL_LOOPEXIT:%.*]] ; CHECK: for.body29.prol.preheader: ; CHECK-NEXT: br label [[FOR_BODY29_PROL:%.*]] ; CHECK: for.body29.prol: ; CHECK-NEXT: [[ARRAYIDX30_PROL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_1_LCSSA]], i32 [[J_0_LCSSA]] -; CHECK-NEXT: [[TMP47:%.*]] = load i16, i16* [[ARRAYIDX30_PROL]], align 2 -; CHECK-NEXT: [[CONV31_PROL:%.*]] = sext i16 [[TMP47]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX30_PROL]], align 2 +; CHECK-NEXT: [[CONV31_PROL:%.*]] = sext i16 [[TMP14]] to i32 ; CHECK-NEXT: [[ARRAYIDX32_PROL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_1_LCSSA]], i32 [[J_0_LCSSA]] -; CHECK-NEXT: [[TMP48:%.*]] = load i16, i16* [[ARRAYIDX32_PROL]], align 2 -; CHECK-NEXT: [[CONV33_PROL:%.*]] = sext i16 [[TMP48]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX32_PROL]], align 2 +; CHECK-NEXT: [[CONV33_PROL:%.*]] = sext i16 [[TMP15]] to i32 ; CHECK-NEXT: [[MUL34_PROL:%.*]] = mul nsw i32 [[CONV33_PROL]], [[CONV31_PROL]] -; CHECK-NEXT: [[TMP49:%.*]] = load i32, i32* [[PDEST_ADDR_1_LCSSA]], align 4 -; CHECK-NEXT: [[ADD35_PROL:%.*]] = add nsw i32 [[MUL34_PROL]], [[TMP49]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[PDEST_ADDR_1_LCSSA]], align 4 +; CHECK-NEXT: [[ADD35_PROL:%.*]] = add nsw i32 [[MUL34_PROL]], [[TMP16]] ; CHECK-NEXT: store i32 [[ADD35_PROL]], i32* [[PDEST_ADDR_1_LCSSA]], align 4 ; CHECK-NEXT: [[INCDEC_PTR36_PROL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_1_LCSSA]], i32 1 ; CHECK-NEXT: [[INCDEC_PTR37_PROL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_1_LCSSA]], i32 1 ; CHECK-NEXT: [[INCDEC_PTR38_PROL:%.*]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_1_LCSSA]], i32 1 ; CHECK-NEXT: [[INC_PROL:%.*]] = add nuw i32 [[J_0_LCSSA]], 1 -; CHECK-NEXT: [[PROL_ITER_SUB:%.*]] = sub i32 [[XTRAITER4]], 1 +; CHECK-NEXT: [[PROL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1 ; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0 ; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[FOR_BODY29_PROL_1:%.*]], label [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA:%.*]] ; CHECK: for.body29.prol.loopexit.unr-lcssa: @@ -283,8 +115,8 @@ ; CHECK-NEXT: [[PDEST_ADDR_283_UNR:%.*]] = phi i32* [ [[PDEST_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ], [ [[PDEST_ADDR_283_UNR_PH]], [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] ] ; CHECK-NEXT: [[PSRCA_ADDR_282_UNR:%.*]] = phi i16* [ [[PSRCA_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ], [ [[PSRCA_ADDR_282_UNR_PH]], [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] ] ; CHECK-NEXT: [[PSRCB_ADDR_281_UNR:%.*]] = phi i16* [ [[PSRCB_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ], [ [[PSRCB_ADDR_281_UNR_PH]], [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[TMP50:%.*]] = icmp ult i32 [[TMP46]], 3 -; CHECK-NEXT: br i1 [[TMP50]], label [[FOR_END40_LOOPEXIT:%.*]], label [[FOR_BODY29_PREHEADER_NEW:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ult i32 [[TMP13]], 3 +; CHECK-NEXT: br i1 [[TMP17]], label [[FOR_END40_LOOPEXIT:%.*]], label [[FOR_BODY29_PREHEADER_NEW:%.*]] ; CHECK: for.body29.preheader.new: ; CHECK-NEXT: br label [[FOR_BODY29:%.*]] ; CHECK: for.body29: @@ -293,56 +125,56 @@ ; CHECK-NEXT: [[PSRCA_ADDR_282:%.*]] = phi i16* [ [[PSRCA_ADDR_282_UNR]], [[FOR_BODY29_PREHEADER_NEW]] ], [ [[INCDEC_PTR36_3:%.*]], [[FOR_BODY29]] ] ; CHECK-NEXT: [[PSRCB_ADDR_281:%.*]] = phi i16* [ [[PSRCB_ADDR_281_UNR]], [[FOR_BODY29_PREHEADER_NEW]] ], [ [[INCDEC_PTR37_3:%.*]], [[FOR_BODY29]] ] ; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_282]], i32 [[J_184]] -; CHECK-NEXT: [[TMP51:%.*]] = load i16, i16* [[ARRAYIDX30]], align 2 -; CHECK-NEXT: [[CONV31:%.*]] = sext i16 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX30]], align 2 +; CHECK-NEXT: [[CONV31:%.*]] = sext i16 [[TMP18]] to i32 ; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_281]], i32 [[J_184]] -; CHECK-NEXT: [[TMP52:%.*]] = load i16, i16* [[ARRAYIDX32]], align 2 -; CHECK-NEXT: [[CONV33:%.*]] = sext i16 [[TMP52]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[ARRAYIDX32]], align 2 +; CHECK-NEXT: [[CONV33:%.*]] = sext i16 [[TMP19]] to i32 ; CHECK-NEXT: [[MUL34:%.*]] = mul nsw i32 [[CONV33]], [[CONV31]] -; CHECK-NEXT: [[TMP53:%.*]] = load i32, i32* [[PDEST_ADDR_283]], align 4 -; CHECK-NEXT: [[ADD35:%.*]] = add nsw i32 [[MUL34]], [[TMP53]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[PDEST_ADDR_283]], align 4 +; CHECK-NEXT: [[ADD35:%.*]] = add nsw i32 [[MUL34]], [[TMP20]] ; CHECK-NEXT: store i32 [[ADD35]], i32* [[PDEST_ADDR_283]], align 4 ; CHECK-NEXT: [[INCDEC_PTR36:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_282]], i32 1 ; CHECK-NEXT: [[INCDEC_PTR37:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_281]], i32 1 ; CHECK-NEXT: [[INCDEC_PTR38:%.*]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_283]], i32 1 ; CHECK-NEXT: [[INC:%.*]] = add nuw i32 [[J_184]], 1 ; CHECK-NEXT: [[ARRAYIDX30_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36]], i32 [[INC]] -; CHECK-NEXT: [[TMP54:%.*]] = load i16, i16* [[ARRAYIDX30_1]], align 2 -; CHECK-NEXT: [[CONV31_1:%.*]] = sext i16 [[TMP54]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[ARRAYIDX30_1]], align 2 +; CHECK-NEXT: [[CONV31_1:%.*]] = sext i16 [[TMP21]] to i32 ; CHECK-NEXT: [[ARRAYIDX32_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37]], i32 [[INC]] -; CHECK-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX32_1]], align 2 -; CHECK-NEXT: [[CONV33_1:%.*]] = sext i16 [[TMP55]] to i32 +; CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[ARRAYIDX32_1]], align 2 +; CHECK-NEXT: [[CONV33_1:%.*]] = sext i16 [[TMP22]] to i32 ; CHECK-NEXT: [[MUL34_1:%.*]] = mul nsw i32 [[CONV33_1]], [[CONV31_1]] -; CHECK-NEXT: [[TMP56:%.*]] = load i32, i32* [[INCDEC_PTR38]], align 4 -; CHECK-NEXT: [[ADD35_1:%.*]] = add nsw i32 [[MUL34_1]], [[TMP56]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[INCDEC_PTR38]], align 4 +; CHECK-NEXT: [[ADD35_1:%.*]] = add nsw i32 [[MUL34_1]], [[TMP23]] ; CHECK-NEXT: store i32 [[ADD35_1]], i32* [[INCDEC_PTR38]], align 4 ; CHECK-NEXT: [[INCDEC_PTR36_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36]], i32 1 ; CHECK-NEXT: [[INCDEC_PTR37_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37]], i32 1 ; CHECK-NEXT: [[INCDEC_PTR38_1:%.*]] = getelementptr inbounds i32, i32* [[INCDEC_PTR38]], i32 1 ; CHECK-NEXT: [[INC_1:%.*]] = add nuw i32 [[INC]], 1 ; CHECK-NEXT: [[ARRAYIDX30_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_1]], i32 [[INC_1]] -; CHECK-NEXT: [[TMP57:%.*]] = load i16, i16* [[ARRAYIDX30_2]], align 2 -; CHECK-NEXT: [[CONV31_2:%.*]] = sext i16 [[TMP57]] to i32 +; CHECK-NEXT: [[TMP24:%.*]] = load i16, i16* [[ARRAYIDX30_2]], align 2 +; CHECK-NEXT: [[CONV31_2:%.*]] = sext i16 [[TMP24]] to i32 ; CHECK-NEXT: [[ARRAYIDX32_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_1]], i32 [[INC_1]] -; CHECK-NEXT: [[TMP58:%.*]] = load i16, i16* [[ARRAYIDX32_2]], align 2 -; CHECK-NEXT: [[CONV33_2:%.*]] = sext i16 [[TMP58]] to i32 +; CHECK-NEXT: [[TMP25:%.*]] = load i16, i16* [[ARRAYIDX32_2]], align 2 +; CHECK-NEXT: [[CONV33_2:%.*]] = sext i16 [[TMP25]] to i32 ; CHECK-NEXT: [[MUL34_2:%.*]] = mul nsw i32 [[CONV33_2]], [[CONV31_2]] -; CHECK-NEXT: [[TMP59:%.*]] = load i32, i32* [[INCDEC_PTR38_1]], align 4 -; CHECK-NEXT: [[ADD35_2:%.*]] = add nsw i32 [[MUL34_2]], [[TMP59]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[INCDEC_PTR38_1]], align 4 +; CHECK-NEXT: [[ADD35_2:%.*]] = add nsw i32 [[MUL34_2]], [[TMP26]] ; CHECK-NEXT: store i32 [[ADD35_2]], i32* [[INCDEC_PTR38_1]], align 4 ; CHECK-NEXT: [[INCDEC_PTR36_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_1]], i32 1 ; CHECK-NEXT: [[INCDEC_PTR37_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_1]], i32 1 ; CHECK-NEXT: [[INCDEC_PTR38_2:%.*]] = getelementptr inbounds i32, i32* [[INCDEC_PTR38_1]], i32 1 ; CHECK-NEXT: [[INC_2:%.*]] = add nuw i32 [[INC_1]], 1 ; CHECK-NEXT: [[ARRAYIDX30_3:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_2]], i32 [[INC_2]] -; CHECK-NEXT: [[TMP60:%.*]] = load i16, i16* [[ARRAYIDX30_3]], align 2 -; CHECK-NEXT: [[CONV31_3:%.*]] = sext i16 [[TMP60]] to i32 +; CHECK-NEXT: [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX30_3]], align 2 +; CHECK-NEXT: [[CONV31_3:%.*]] = sext i16 [[TMP27]] to i32 ; CHECK-NEXT: [[ARRAYIDX32_3:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_2]], i32 [[INC_2]] -; CHECK-NEXT: [[TMP61:%.*]] = load i16, i16* [[ARRAYIDX32_3]], align 2 -; CHECK-NEXT: [[CONV33_3:%.*]] = sext i16 [[TMP61]] to i32 +; CHECK-NEXT: [[TMP28:%.*]] = load i16, i16* [[ARRAYIDX32_3]], align 2 +; CHECK-NEXT: [[CONV33_3:%.*]] = sext i16 [[TMP28]] to i32 ; CHECK-NEXT: [[MUL34_3:%.*]] = mul nsw i32 [[CONV33_3]], [[CONV31_3]] -; CHECK-NEXT: [[TMP62:%.*]] = load i32, i32* [[INCDEC_PTR38_2]], align 4 -; CHECK-NEXT: [[ADD35_3:%.*]] = add nsw i32 [[MUL34_3]], [[TMP62]] +; CHECK-NEXT: [[TMP29:%.*]] = load i32, i32* [[INCDEC_PTR38_2]], align 4 +; CHECK-NEXT: [[ADD35_3:%.*]] = add nsw i32 [[MUL34_3]], [[TMP29]] ; CHECK-NEXT: store i32 [[ADD35_3]], i32* [[INCDEC_PTR38_2]], align 4 ; CHECK-NEXT: [[INCDEC_PTR36_3]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_2]], i32 1 ; CHECK-NEXT: [[INCDEC_PTR37_3]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_2]], i32 1 @@ -353,9 +185,9 @@ ; CHECK: for.end40.loopexit.unr-lcssa: ; CHECK-NEXT: br label [[FOR_END40_LOOPEXIT]] ; CHECK: for.end40.loopexit: -; CHECK-NEXT: [[SCEVGEP93:%.*]] = getelementptr i16, i16* [[PSRCB_ADDR_1_LCSSA]], i32 [[TMP43]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[PSRCA_ADDR_1_LCSSA]], i32 [[TMP43]] -; CHECK-NEXT: [[SCEVGEP94:%.*]] = getelementptr i32, i32* [[PDEST_ADDR_1_LCSSA]], i32 [[TMP43]] +; CHECK-NEXT: [[SCEVGEP93:%.*]] = getelementptr i16, i16* [[PSRCB_ADDR_1_LCSSA]], i32 [[TMP10]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[PSRCA_ADDR_1_LCSSA]], i32 [[TMP10]] +; CHECK-NEXT: [[SCEVGEP94:%.*]] = getelementptr i32, i32* [[PDEST_ADDR_1_LCSSA]], i32 [[TMP10]] ; CHECK-NEXT: br label [[FOR_END40]] ; CHECK: for.end40: ; CHECK-NEXT: [[PSRCB_ADDR_2_LCSSA]] = phi i16* [ [[PSRCB_ADDR_1_LCSSA]], [[FOR_END]] ], [ [[SCEVGEP93]], [[FOR_END40_LOOPEXIT]] ] @@ -364,85 +196,16 @@ ; CHECK-NEXT: [[INC42]] = add nuw i32 [[I_092]], 1 ; CHECK-NEXT: [[EXITCOND95:%.*]] = icmp eq i32 [[INC42]], [[BLKCNT]] ; CHECK-NEXT: br i1 [[EXITCOND95]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] -; CHECK: for.body3.epil.1: -; CHECK-NEXT: [[TMP63:%.*]] = load i16, i16* [[ADD_PTR_EPIL]], align 2 -; CHECK-NEXT: [[CONV_EPIL_1:%.*]] = sext i16 [[TMP63]] to i32 -; CHECK-NEXT: [[TMP64:%.*]] = load i16, i16* [[ADD_PTR23_EPIL]], align 2 -; CHECK-NEXT: [[CONV5_EPIL_1:%.*]] = sext i16 [[TMP64]] to i32 -; CHECK-NEXT: [[MUL_EPIL_1:%.*]] = mul nsw i32 [[CONV5_EPIL_1]], [[CONV_EPIL_1]] -; CHECK-NEXT: [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL]], i32 1 -; CHECK-NEXT: [[TMP65:%.*]] = load i16, i16* [[ARRAYIDX6_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV7_EPIL_1:%.*]] = sext i16 [[TMP65]] to i32 -; CHECK-NEXT: [[ARRAYIDX8_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL]], i32 1 -; CHECK-NEXT: [[TMP66:%.*]] = load i16, i16* [[ARRAYIDX8_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV9_EPIL_1:%.*]] = sext i16 [[TMP66]] to i32 -; CHECK-NEXT: [[MUL10_EPIL_1:%.*]] = mul nsw i32 [[CONV9_EPIL_1]], [[CONV7_EPIL_1]] -; CHECK-NEXT: [[ARRAYIDX11_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL]], i32 2 -; CHECK-NEXT: [[TMP67:%.*]] = load i16, i16* [[ARRAYIDX11_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV12_EPIL_1:%.*]] = sext i16 [[TMP67]] to i32 -; CHECK-NEXT: [[ARRAYIDX13_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL]], i32 3 -; CHECK-NEXT: [[TMP68:%.*]] = load i16, i16* [[ARRAYIDX13_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV14_EPIL_1:%.*]] = sext i16 [[TMP68]] to i32 -; CHECK-NEXT: [[MUL15_EPIL_1:%.*]] = mul nsw i32 [[CONV14_EPIL_1]], [[CONV12_EPIL_1]] -; CHECK-NEXT: [[ARRAYIDX17_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL]], i32 3 -; CHECK-NEXT: [[TMP69:%.*]] = load i16, i16* [[ARRAYIDX17_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV18_EPIL_1:%.*]] = sext i16 [[TMP69]] to i32 -; CHECK-NEXT: [[ADD21_EPIL_1:%.*]] = add i32 [[MUL10_EPIL_1]], [[MUL_EPIL_1]] -; CHECK-NEXT: [[ADD_EPIL_1:%.*]] = add i32 [[ADD21_EPIL_1]], [[CONV14_EPIL_1]] -; CHECK-NEXT: [[ADD16_EPIL_1:%.*]] = add i32 [[ADD_EPIL_1]], [[MUL15_EPIL_1]] -; CHECK-NEXT: [[ADD22_EPIL_1:%.*]] = add i32 [[ADD16_EPIL_1]], [[CONV18_EPIL_1]] -; CHECK-NEXT: store i32 [[ADD22_EPIL_1]], i32* [[INCDEC_PTR_EPIL]], align 4 -; CHECK-NEXT: [[ADD_PTR_EPIL_1]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL]], i32 4 -; CHECK-NEXT: [[ADD_PTR23_EPIL_1]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR_EPIL_1]] = getelementptr inbounds i32, i32* [[INCDEC_PTR_EPIL]], i32 1 -; CHECK-NEXT: [[ADD24_EPIL_1:%.*]] = add nuw nsw i32 [[ADD24_EPIL]], 4 -; CHECK-NEXT: [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1 -; CHECK-NEXT: [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0 -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label [[FOR_BODY3_EPIL_2]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] -; CHECK: for.body3.epil.2: -; CHECK-NEXT: [[TMP70:%.*]] = load i16, i16* [[ADD_PTR_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV_EPIL_2:%.*]] = sext i16 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP71:%.*]] = load i16, i16* [[ADD_PTR23_EPIL_1]], align 2 -; CHECK-NEXT: [[CONV5_EPIL_2:%.*]] = sext i16 [[TMP71]] to i32 -; CHECK-NEXT: [[MUL_EPIL_2:%.*]] = mul nsw i32 [[CONV5_EPIL_2]], [[CONV_EPIL_2]] -; CHECK-NEXT: [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL_1]], i32 1 -; CHECK-NEXT: [[TMP72:%.*]] = load i16, i16* [[ARRAYIDX6_EPIL_2]], align 2 -; CHECK-NEXT: [[CONV7_EPIL_2:%.*]] = sext i16 [[TMP72]] to i32 -; CHECK-NEXT: [[ARRAYIDX8_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL_1]], i32 1 -; CHECK-NEXT: [[TMP73:%.*]] = load i16, i16* [[ARRAYIDX8_EPIL_2]], align 2 -; CHECK-NEXT: [[CONV9_EPIL_2:%.*]] = sext i16 [[TMP73]] to i32 -; CHECK-NEXT: [[MUL10_EPIL_2:%.*]] = mul nsw i32 [[CONV9_EPIL_2]], [[CONV7_EPIL_2]] -; CHECK-NEXT: [[ARRAYIDX11_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL_1]], i32 2 -; CHECK-NEXT: [[TMP74:%.*]] = load i16, i16* [[ARRAYIDX11_EPIL_2]], align 2 -; CHECK-NEXT: [[CONV12_EPIL_2:%.*]] = sext i16 [[TMP74]] to i32 -; CHECK-NEXT: [[ARRAYIDX13_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL_1]], i32 3 -; CHECK-NEXT: [[TMP75:%.*]] = load i16, i16* [[ARRAYIDX13_EPIL_2]], align 2 -; CHECK-NEXT: [[CONV14_EPIL_2:%.*]] = sext i16 [[TMP75]] to i32 -; CHECK-NEXT: [[MUL15_EPIL_2:%.*]] = mul nsw i32 [[CONV14_EPIL_2]], [[CONV12_EPIL_2]] -; CHECK-NEXT: [[ARRAYIDX17_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL_1]], i32 3 -; CHECK-NEXT: [[TMP76:%.*]] = load i16, i16* [[ARRAYIDX17_EPIL_2]], align 2 -; CHECK-NEXT: [[CONV18_EPIL_2:%.*]] = sext i16 [[TMP76]] to i32 -; CHECK-NEXT: [[ADD21_EPIL_2:%.*]] = add i32 [[MUL10_EPIL_2]], [[MUL_EPIL_2]] -; CHECK-NEXT: [[ADD_EPIL_2:%.*]] = add i32 [[ADD21_EPIL_2]], [[CONV14_EPIL_2]] -; CHECK-NEXT: [[ADD16_EPIL_2:%.*]] = add i32 [[ADD_EPIL_2]], [[MUL15_EPIL_2]] -; CHECK-NEXT: [[ADD22_EPIL_2:%.*]] = add i32 [[ADD16_EPIL_2]], [[CONV18_EPIL_2]] -; CHECK-NEXT: store i32 [[ADD22_EPIL_2]], i32* [[INCDEC_PTR_EPIL_1]], align 4 -; CHECK-NEXT: [[ADD_PTR_EPIL_2]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL_1]], i32 4 -; CHECK-NEXT: [[ADD_PTR23_EPIL_2]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL_1]], i32 4 -; CHECK-NEXT: [[INCDEC_PTR_EPIL_2]] = getelementptr inbounds i32, i32* [[INCDEC_PTR_EPIL_1]], i32 1 -; CHECK-NEXT: [[ADD24_EPIL_2:%.*]] = add nuw nsw i32 [[ADD24_EPIL_1]], 4 -; CHECK-NEXT: [[EPIL_ITER_SUB_2:%.*]] = sub i32 [[EPIL_ITER_SUB_1]], 1 -; CHECK-NEXT: br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] ; CHECK: for.body29.prol.1: ; CHECK-NEXT: [[ARRAYIDX30_PROL_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_PROL]], i32 [[INC_PROL]] -; CHECK-NEXT: [[TMP77:%.*]] = load i16, i16* [[ARRAYIDX30_PROL_1]], align 2 -; CHECK-NEXT: [[CONV31_PROL_1:%.*]] = sext i16 [[TMP77]] to i32 +; CHECK-NEXT: [[TMP30:%.*]] = load i16, i16* [[ARRAYIDX30_PROL_1]], align 2 +; CHECK-NEXT: [[CONV31_PROL_1:%.*]] = sext i16 [[TMP30]] to i32 ; CHECK-NEXT: [[ARRAYIDX32_PROL_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_PROL]], i32 [[INC_PROL]] -; CHECK-NEXT: [[TMP78:%.*]] = load i16, i16* [[ARRAYIDX32_PROL_1]], align 2 -; CHECK-NEXT: [[CONV33_PROL_1:%.*]] = sext i16 [[TMP78]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = load i16, i16* [[ARRAYIDX32_PROL_1]], align 2 +; CHECK-NEXT: [[CONV33_PROL_1:%.*]] = sext i16 [[TMP31]] to i32 ; CHECK-NEXT: [[MUL34_PROL_1:%.*]] = mul nsw i32 [[CONV33_PROL_1]], [[CONV31_PROL_1]] -; CHECK-NEXT: [[TMP79:%.*]] = load i32, i32* [[INCDEC_PTR38_PROL]], align 4 -; CHECK-NEXT: [[ADD35_PROL_1:%.*]] = add nsw i32 [[MUL34_PROL_1]], [[TMP79]] +; CHECK-NEXT: [[TMP32:%.*]] = load i32, i32* [[INCDEC_PTR38_PROL]], align 4 +; CHECK-NEXT: [[ADD35_PROL_1:%.*]] = add nsw i32 [[MUL34_PROL_1]], [[TMP32]] ; CHECK-NEXT: store i32 [[ADD35_PROL_1]], i32* [[INCDEC_PTR38_PROL]], align 4 ; CHECK-NEXT: [[INCDEC_PTR36_PROL_1]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_PROL]], i32 1 ; CHECK-NEXT: [[INCDEC_PTR37_PROL_1]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_PROL]], i32 1 @@ -453,14 +216,14 @@ ; CHECK-NEXT: br i1 [[PROL_ITER_CMP_1]], label [[FOR_BODY29_PROL_2]], label [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] ; CHECK: for.body29.prol.2: ; CHECK-NEXT: [[ARRAYIDX30_PROL_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_PROL_1]], i32 [[INC_PROL_1]] -; CHECK-NEXT: [[TMP80:%.*]] = load i16, i16* [[ARRAYIDX30_PROL_2]], align 2 -; CHECK-NEXT: [[CONV31_PROL_2:%.*]] = sext i16 [[TMP80]] to i32 +; CHECK-NEXT: [[TMP33:%.*]] = load i16, i16* [[ARRAYIDX30_PROL_2]], align 2 +; CHECK-NEXT: [[CONV31_PROL_2:%.*]] = sext i16 [[TMP33]] to i32 ; CHECK-NEXT: [[ARRAYIDX32_PROL_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_PROL_1]], i32 [[INC_PROL_1]] -; CHECK-NEXT: [[TMP81:%.*]] = load i16, i16* [[ARRAYIDX32_PROL_2]], align 2 -; CHECK-NEXT: [[CONV33_PROL_2:%.*]] = sext i16 [[TMP81]] to i32 +; CHECK-NEXT: [[TMP34:%.*]] = load i16, i16* [[ARRAYIDX32_PROL_2]], align 2 +; CHECK-NEXT: [[CONV33_PROL_2:%.*]] = sext i16 [[TMP34]] to i32 ; CHECK-NEXT: [[MUL34_PROL_2:%.*]] = mul nsw i32 [[CONV33_PROL_2]], [[CONV31_PROL_2]] -; CHECK-NEXT: [[TMP82:%.*]] = load i32, i32* [[INCDEC_PTR38_PROL_1]], align 4 -; CHECK-NEXT: [[ADD35_PROL_2:%.*]] = add nsw i32 [[MUL34_PROL_2]], [[TMP82]] +; CHECK-NEXT: [[TMP35:%.*]] = load i32, i32* [[INCDEC_PTR38_PROL_1]], align 4 +; CHECK-NEXT: [[ADD35_PROL_2:%.*]] = add nsw i32 [[MUL34_PROL_2]], [[TMP35]] ; CHECK-NEXT: store i32 [[ADD35_PROL_2]], i32* [[INCDEC_PTR38_PROL_1]], align 4 ; CHECK-NEXT: [[INCDEC_PTR36_PROL_2]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_PROL_1]], i32 1 ; CHECK-NEXT: [[INCDEC_PTR37_PROL_2]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_PROL_1]], i32 1 Index: llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll +++ llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll @@ -66,16 +66,13 @@ ; ; CHECK-LABEL: @test_02( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 2147483646 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 2147483646 -; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -119,16 +116,13 @@ ; ; CHECK-LABEL: @test_04( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], -2147483647 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 -2147483647 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], 1 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -2147483647, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP1]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -252,16 +246,13 @@ define void @test_02_unsigned(i32 %n) { ; CHECK-LABEL: @test_02_unsigned( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], -2 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 -2 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[UMAX]], 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 4294967294, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP1]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[IV]], [[ZEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -330,16 +321,13 @@ define void @test_05_unsigned(i32 %n) { ; CHECK-LABEL: @test_05_unsigned( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP1]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[IV]], [[ZEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -470,17 +458,15 @@ define void @test_10(i32 %n) { ; CHECK-LABEL: @test_10( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 100 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 90 -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 90 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMIN]], -99 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[TMP3]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i64 [[IV]], [[SEXT]] +; CHECK-NEXT: [[NEGCMP:%.*]] = icmp slt i64 [[IV]], -10 +; CHECK-NEXT: [[CMP:%.*]] = and i1 [[TMP0]], [[NEGCMP]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -551,15 +537,13 @@ ; CHECK-LABEL: @test_12( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N:%.*]] = load i32, i32* [[P:%.*]], !range !0 -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[IV_NEXT]], [[ZEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll +++ llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll @@ -122,17 +122,15 @@ ; CHECK-NEXT: [[ENTRY_COND:%.*]] = icmp ne i32 [[LIM:%.*]], 0 ; CHECK-NEXT: br i1 [[ENTRY_COND]], label [[LOOP_PREHEADER:%.*]], label [[LEAVE:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[LIM]], 2 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[LIM]], i32 2 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[UMAX]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LIM]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: call void @dummy.i64(i64 [[TMP1]]) -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LEAVE_LOOPEXIT:%.*]] +; CHECK-NEXT: [[BE_COND:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[BE_COND]], label [[LOOP]], label [[LEAVE_LOOPEXIT:%.*]] ; CHECK: leave.loopexit: ; CHECK-NEXT: br label [[LEAVE]] ; CHECK: leave: @@ -167,9 +165,7 @@ ; CHECK-NEXT: [[BC0:%.*]] = bitcast i32* [[LINED:%.*]] to i8* ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[SIZE]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[HSIZE:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[NSTEPS:%.*]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP2]], i32 [[NSTEPS]], i32 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT11:%.*]] = zext i32 [[SMAX]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[NSTEPS:%.*]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] @@ -204,8 +200,8 @@ ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1 -; CHECK-NEXT: [[EXITCOND12:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT8]], [[WIDE_TRIP_COUNT11]] -; CHECK-NEXT: br i1 [[EXITCOND12]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT8]], [[TMP2]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll +++ llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll @@ -311,15 +311,12 @@ define void @test3_neg(i64 %start) { ; CHECK-LABEL: @test3_neg( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[START:%.*]], -1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[START]], i64 -1 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -339,19 +336,16 @@ define void @test4_neg(i64 %start) { ; CHECK-LABEL: @test4_neg( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[START:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[START]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = add nuw i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 25 ; CHECK-NEXT: br i1 [[CMP]], label [[BACKEDGE]], label [[FOR_END:%.*]] ; CHECK: backedge: ; CHECK-NEXT: call void @foo() -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[LOOP]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END]], label [[LOOP]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -99,8 +99,7 @@ ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[LIMIT:%.*]] to i32 ; CHECK-NEXT: br i1 undef, label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] ; CHECK: for.cond1.preheader.us.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[CONV]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[CONV]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[CONV]] to i64 ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.preheader: ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] @@ -111,8 +110,8 @@ ; CHECK-NEXT: br label [[FOR_INC13_US]] ; CHECK: for.inc13.us: ; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 -; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 4 -; CHECK-NEXT: br i1 [[EXITCOND4]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_END_LOOPEXIT1:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 4 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_END_LOOPEXIT1:%.*]] ; CHECK: for.body4.us: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY4_LR_PH_US]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US:%.*]] ] ; CHECK-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[A:%.*]], i64 [[INDVARS_IV2]], i64 [[INDVARS_IV]] @@ -122,10 +121,9 @@ ; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX8_US]], align 1 ; CHECK-NEXT: store i8 [[TMP2]], i8* [[ARRAYIDX6_US]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT:%.*]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY4_US]], label [[FOR_INC13_US_LOOPEXIT:%.*]] +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_INC13_US_LOOPEXIT:%.*]] ; CHECK: for.body4.lr.ph.us: -; CHECK-NEXT: [[WIDE_TRIP_COUNT]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[FOR_BODY4_US]] ; CHECK: for.cond1.preheader: ; CHECK-NEXT: br i1 false, label [[FOR_INC13:%.*]], label [[FOR_INC13]] @@ -307,20 +305,17 @@ define i32 @test6(i32* %a, i32 %b) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[B:%.*]], -1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[B]], i32 -1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[B:%.*]] to i64 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[INDVARS_IV]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: @@ -352,10 +347,7 @@ ; CHECK-LABEL: @test7( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[B:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[B]], -1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[B]], i32 -1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SMAX]], 2 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[B]] to i64 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ] @@ -364,11 +356,11 @@ ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP2]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND]], label [[FOR_END]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i64 [[INDVARS_IV]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0]], [[FOR_BODY]] ], [ [[SUM_0]], [[FOR_COND]] ] ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] @@ -457,15 +449,13 @@ ; CHECK-NEXT: br i1 [[E]], label [[FOR_COND_PREHEADER:%.*]], label [[LEAVE:%.*]] ; CHECK: for.cond.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[INIT]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[INIT]], [[B:%.*]] -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[INIT]], i32 [[B]] -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[B:%.*]] to i64 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_COND_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ] ; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 Index: llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll +++ llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll @@ -19,77 +19,30 @@ ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967280 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[X:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[N_VEC]], -16 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[XTRAITER1:%.*]] = and i64 [[TMP2]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], 1 -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] -; CHECK: vector.ph.new: -; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND12:%.*]] = phi <16 x i32> [ , [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT13_1:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_1:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> , [[VEC_IND12]] -; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> , <16 x i8> -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[S:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <16 x i8>* -; CHECK-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP9]], align 1 -; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], -; CHECK-NEXT: [[NITER_NSUB:%.*]] = sub i64 [[NITER]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = shl <16 x i32> , [[VEC_IND_NEXT13]] -; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i32> [[TMP10]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <16 x i32> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i8> , <16 x i8> -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_NEXT]] -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <16 x i8>* -; CHECK-NEXT: store <16 x i8> [[TMP13]], <16 x i8>* [[TMP15]], align 1 -; CHECK-NEXT: [[INDEX_NEXT_1]] = add i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND_NEXT13]], -; CHECK-NEXT: [[NITER_NSUB_1]] = sub i64 [[NITER_NSUB]], 1 -; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NSUB_1]], 0 -; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]] -; CHECK: middle.block.unr-lcssa.loopexit: -; CHECK-NEXT: [[INDEX_UNR_PH:%.*]] = phi i64 [ [[INDEX_NEXT_1]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND12_UNR_PH:%.*]] = phi <16 x i32> [ [[VEC_IND_NEXT13_1]], [[VECTOR_BODY]] ] -; CHECK-NEXT: br label [[MIDDLE_BLOCK_UNR_LCSSA]] -; CHECK: middle.block.unr-lcssa: -; CHECK-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD2]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]] -; CHECK: vector.body.epil.preheader: -; CHECK-NEXT: br label [[VECTOR_BODY_EPIL:%.*]] -; CHECK: vector.body.epil: -; CHECK-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ] -; CHECK-NEXT: [[VEC_IND12_EPIL:%.*]] = phi <16 x i32> [ [[VEC_IND12_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ] -; CHECK-NEXT: [[TMP16:%.*]] = shl <16 x i32> , [[VEC_IND12_EPIL]] -; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i32> [[TMP16]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq <16 x i32> [[TMP17]], zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i8> , <16 x i8> -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_EPIL]] -; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <16 x i8>* -; CHECK-NEXT: store <16 x i8> [[TMP19]], <16 x i8>* [[TMP21]], align 1 -; CHECK-NEXT: [[INDEX_NEXT_EPIL:%.*]] = add i64 [[INDEX_EPIL]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT13_EPIL:%.*]] = add <16 x i32> [[VEC_IND12_EPIL]], -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT_EPIL]], [[N_VEC]] -; CHECK-NEXT: br label [[MIDDLE_BLOCK_EPILOG_LCSSA:%.*]] -; CHECK: middle.block.epilog-lcssa: -; CHECK-NEXT: br label [[MIDDLE_BLOCK]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND12:%.*]] = phi <16 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT13:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i32> , [[VEC_IND12]] +; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i32> [[TMP0]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> , <16 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[S:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <16 x i8>* +; CHECK-NEXT: store <16 x i8> [[TMP3]], <16 x i8>* [[TMP5]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 +; CHECK-NEXT: [[VEC_IND_NEXT13]] = add <16 x i32> [[VEC_IND12]], +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]] -; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1 -; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[TMP24]], [[INDVARS_IV_PH]] -; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP23]], 7 +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[INDVARS_IV_PH]] +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP7]], 7 ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 ; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]] ; CHECK: for.body.prol.preheader: @@ -97,8 +50,8 @@ ; CHECK: for.body.prol: ; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ] ; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ] -; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32 -; CHECK-NEXT: [[SHL_PROL:%.*]] = shl i32 1, [[TMP26]] +; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32 +; CHECK-NEXT: [[SHL_PROL:%.*]] = shl i32 1, [[TMP10]] ; CHECK-NEXT: [[AND_PROL:%.*]] = and i32 [[SHL_PROL]], [[X]] ; CHECK-NEXT: [[TOBOOL_PROL:%.*]] = icmp eq i32 [[AND_PROL]], 0 ; CHECK-NEXT: [[CONV_PROL:%.*]] = select i1 [[TOBOOL_PROL]], i8 48, i8 49 @@ -114,70 +67,70 @@ ; CHECK-NEXT: br label [[FOR_BODY_PROL_LOOPEXIT]] ; CHECK: for.body.prol.loopexit: ; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[TMP27:%.*]] = icmp ult i64 [[TMP25]], 7 -; CHECK-NEXT: br i1 [[TMP27]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i64 [[TMP9]], 7 +; CHECK-NEXT: br i1 [[TMP11]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] ; CHECK: for.body.preheader.new: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP28]] +; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP12]] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[X]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TOBOOL]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 1, [[TMP29]] +; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 1, [[TMP13]] ; CHECK-NEXT: [[AND_1:%.*]] = and i32 [[SHL_1]], [[X]] ; CHECK-NEXT: [[TOBOOL_1:%.*]] = icmp eq i32 [[AND_1]], 0 ; CHECK-NEXT: [[CONV_1:%.*]] = select i1 [[TOBOOL_1]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: store i8 [[CONV_1]], i8* [[ARRAYIDX_1]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 -; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32 -; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 1, [[TMP30]] +; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32 +; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 1, [[TMP14]] ; CHECK-NEXT: [[AND_2:%.*]] = and i32 [[SHL_2]], [[X]] ; CHECK-NEXT: [[TOBOOL_2:%.*]] = icmp eq i32 [[AND_2]], 0 ; CHECK-NEXT: [[CONV_2:%.*]] = select i1 [[TOBOOL_2]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_1]] ; CHECK-NEXT: store i8 [[CONV_2]], i8* [[ARRAYIDX_2]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1 -; CHECK-NEXT: [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32 -; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 1, [[TMP31]] +; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32 +; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 1, [[TMP15]] ; CHECK-NEXT: [[AND_3:%.*]] = and i32 [[SHL_3]], [[X]] ; CHECK-NEXT: [[TOBOOL_3:%.*]] = icmp eq i32 [[AND_3]], 0 ; CHECK-NEXT: [[CONV_3:%.*]] = select i1 [[TOBOOL_3]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_2]] ; CHECK-NEXT: store i8 [[CONV_3]], i8* [[ARRAYIDX_3]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 -; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32 -; CHECK-NEXT: [[SHL_4:%.*]] = shl i32 1, [[TMP32]] +; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32 +; CHECK-NEXT: [[SHL_4:%.*]] = shl i32 1, [[TMP16]] ; CHECK-NEXT: [[AND_4:%.*]] = and i32 [[SHL_4]], [[X]] ; CHECK-NEXT: [[TOBOOL_4:%.*]] = icmp eq i32 [[AND_4]], 0 ; CHECK-NEXT: [[CONV_4:%.*]] = select i1 [[TOBOOL_4]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_3]] ; CHECK-NEXT: store i8 [[CONV_4]], i8* [[ARRAYIDX_4]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1 -; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32 -; CHECK-NEXT: [[SHL_5:%.*]] = shl i32 1, [[TMP33]] +; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32 +; CHECK-NEXT: [[SHL_5:%.*]] = shl i32 1, [[TMP17]] ; CHECK-NEXT: [[AND_5:%.*]] = and i32 [[SHL_5]], [[X]] ; CHECK-NEXT: [[TOBOOL_5:%.*]] = icmp eq i32 [[AND_5]], 0 ; CHECK-NEXT: [[CONV_5:%.*]] = select i1 [[TOBOOL_5]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_4]] ; CHECK-NEXT: store i8 [[CONV_5]], i8* [[ARRAYIDX_5]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1 -; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32 -; CHECK-NEXT: [[SHL_6:%.*]] = shl i32 1, [[TMP34]] +; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32 +; CHECK-NEXT: [[SHL_6:%.*]] = shl i32 1, [[TMP18]] ; CHECK-NEXT: [[AND_6:%.*]] = and i32 [[SHL_6]], [[X]] ; CHECK-NEXT: [[TOBOOL_6:%.*]] = icmp eq i32 [[AND_6]], 0 ; CHECK-NEXT: [[CONV_6:%.*]] = select i1 [[TOBOOL_6]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_5]] ; CHECK-NEXT: store i8 [[CONV_6]], i8* [[ARRAYIDX_6]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1 -; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32 -; CHECK-NEXT: [[SHL_7:%.*]] = shl i32 1, [[TMP35]] +; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32 +; CHECK-NEXT: [[SHL_7:%.*]] = shl i32 1, [[TMP19]] ; CHECK-NEXT: [[AND_7:%.*]] = and i32 [[SHL_7]], [[X]] ; CHECK-NEXT: [[TOBOOL_7:%.*]] = icmp eq i32 [[AND_7]], 0 ; CHECK-NEXT: [[CONV_7:%.*]] = select i1 [[TOBOOL_7]], i8 48, i8 49 Index: llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -28,119 +28,29 @@ ; AUTO_VEC-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float ; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 ; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 -; AUTO_VEC-NEXT: [[TMP1:%.*]] = add nsw i64 [[N_VEC]], -32 -; AUTO_VEC-NEXT: [[TMP2:%.*]] = lshr exact i64 [[TMP1]], 5 -; AUTO_VEC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 3 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP1]], 96 -; AUTO_VEC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] -; AUTO_VEC: vector.ph.new: -; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP3]], 1152921504606846972 ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTO_VEC: vector.body: -; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <8 x float> [ , [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT_3:%.*]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <8 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <8 x float> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD5:%.*]] = fadd fast <8 x float> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD6:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] +; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] +; AUTO_VEC-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND]], <8 x float>* [[TMP2]], align 4 +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <8 x float>* +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD]], <8 x float>* [[TMP4]], align 4 +; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 16 ; AUTO_VEC-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND]], <8 x float>* [[TMP6]], align 4 -; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 8 +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD5]], <8 x float>* [[TMP6]], align 4 +; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 24 ; AUTO_VEC-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP7]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD]], <8 x float>* [[TMP8]], align 4 -; AUTO_VEC-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 16 -; AUTO_VEC-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD5]], <8 x float>* [[TMP10]], align 4 -; AUTO_VEC-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 24 -; AUTO_VEC-NEXT: [[TMP12:%.*]] = bitcast float* [[TMP11]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD6]], <8 x float>* [[TMP12]], align 4 -; AUTO_VEC-NEXT: [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 32 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD5_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD6_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT]] -; AUTO_VEC-NEXT: [[TMP14:%.*]] = bitcast float* [[TMP13]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND_NEXT]], <8 x float>* [[TMP14]], align 4 -; AUTO_VEC-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 8 -; AUTO_VEC-NEXT: [[TMP16:%.*]] = bitcast float* [[TMP15]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD_1]], <8 x float>* [[TMP16]], align 4 -; AUTO_VEC-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 16 -; AUTO_VEC-NEXT: [[TMP18:%.*]] = bitcast float* [[TMP17]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD5_1]], <8 x float>* [[TMP18]], align 4 -; AUTO_VEC-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 24 -; AUTO_VEC-NEXT: [[TMP20:%.*]] = bitcast float* [[TMP19]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD6_1]], <8 x float>* [[TMP20]], align 4 -; AUTO_VEC-NEXT: [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 64 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD5_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD6_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT_1]] -; AUTO_VEC-NEXT: [[TMP22:%.*]] = bitcast float* [[TMP21]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND_NEXT_1]], <8 x float>* [[TMP22]], align 4 -; AUTO_VEC-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 8 -; AUTO_VEC-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP23]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD_2]], <8 x float>* [[TMP24]], align 4 -; AUTO_VEC-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 16 -; AUTO_VEC-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD5_2]], <8 x float>* [[TMP26]], align 4 -; AUTO_VEC-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 24 -; AUTO_VEC-NEXT: [[TMP28:%.*]] = bitcast float* [[TMP27]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD6_2]], <8 x float>* [[TMP28]], align 4 -; AUTO_VEC-NEXT: [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 96 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD5_3:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD6_3:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT_2]] -; AUTO_VEC-NEXT: [[TMP30:%.*]] = bitcast float* [[TMP29]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND_NEXT_2]], <8 x float>* [[TMP30]], align 4 -; AUTO_VEC-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 8 -; AUTO_VEC-NEXT: [[TMP32:%.*]] = bitcast float* [[TMP31]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD_3]], <8 x float>* [[TMP32]], align 4 -; AUTO_VEC-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 16 -; AUTO_VEC-NEXT: [[TMP34:%.*]] = bitcast float* [[TMP33]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD5_3]], <8 x float>* [[TMP34]], align 4 -; AUTO_VEC-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 24 -; AUTO_VEC-NEXT: [[TMP36:%.*]] = bitcast float* [[TMP35]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD6_3]], <8 x float>* [[TMP36]], align 4 -; AUTO_VEC-NEXT: [[INDEX_NEXT_3]] = add i64 [[INDEX]], 128 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT_3]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[NITER_NSUB_3]] = add i64 [[NITER]], -4 -; AUTO_VEC-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0 -; AUTO_VEC-NEXT: br i1 [[NITER_NCMP_3]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], !llvm.loop !0 -; AUTO_VEC: middle.block.unr-lcssa: -; AUTO_VEC-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT_3]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[VEC_IND_UNR:%.*]] = phi <8 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_3]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 -; AUTO_VEC-NEXT: br i1 [[LCMP_MOD]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY_EPIL:%.*]] -; AUTO_VEC: vector.body.epil: -; AUTO_VEC-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[INDEX_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] -; AUTO_VEC-NEXT: [[VEC_IND_EPIL:%.*]] = phi <8 x float> [ [[VEC_IND_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[VEC_IND_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] -; AUTO_VEC-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[XTRAITER]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] -; AUTO_VEC-NEXT: [[STEP_ADD_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], -; AUTO_VEC-NEXT: [[STEP_ADD5_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], -; AUTO_VEC-NEXT: [[STEP_ADD6_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], -; AUTO_VEC-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_EPIL]] -; AUTO_VEC-NEXT: [[TMP38:%.*]] = bitcast float* [[TMP37]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND_EPIL]], <8 x float>* [[TMP38]], align 4 -; AUTO_VEC-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, float* [[TMP37]], i64 8 -; AUTO_VEC-NEXT: [[TMP40:%.*]] = bitcast float* [[TMP39]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD_EPIL]], <8 x float>* [[TMP40]], align 4 -; AUTO_VEC-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, float* [[TMP37]], i64 16 -; AUTO_VEC-NEXT: [[TMP42:%.*]] = bitcast float* [[TMP41]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD5_EPIL]], <8 x float>* [[TMP42]], align 4 -; AUTO_VEC-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, float* [[TMP37]], i64 24 -; AUTO_VEC-NEXT: [[TMP44:%.*]] = bitcast float* [[TMP43]] to <8 x float>* -; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD6_EPIL]], <8 x float>* [[TMP44]], align 4 -; AUTO_VEC-NEXT: [[INDEX_NEXT_EPIL]] = add i64 [[INDEX_EPIL]], 32 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT_EPIL]] = fadd fast <8 x float> [[VEC_IND_EPIL]], -; AUTO_VEC-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 -; AUTO_VEC-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 -; AUTO_VEC-NEXT: br i1 [[EPIL_ITER_CMP]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], !llvm.loop !2 +; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD6]], <8 x float>* [[TMP8]], align 4 +; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32 +; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; AUTO_VEC-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; AUTO_VEC: middle.block: ; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[ZEXT]] ; AUTO_VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] @@ -151,8 +61,8 @@ ; AUTO_VEC-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 ; AUTO_VEC-NEXT: [[CONV1]] = fadd float [[X_06]], 5.000000e-01 ; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; AUTO_VEC-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] -; AUTO_VEC-NEXT: br i1 [[TMP45]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !4 +; AUTO_VEC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] +; AUTO_VEC-NEXT: br i1 [[TMP10]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2 ; AUTO_VEC: for.end: ; AUTO_VEC-NEXT: ret void ; @@ -259,7 +169,7 @@ ; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1 ; AUTO_VEC-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 ; AUTO_VEC-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 -; AUTO_VEC-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_END]], label [[FOR_BODY_EPIL]], !llvm.loop !6 +; AUTO_VEC-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_END]], label [[FOR_BODY_EPIL]], !llvm.loop !4 ; AUTO_VEC: for.end: ; AUTO_VEC-NEXT: ret void ; @@ -299,124 +209,34 @@ ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 ; AUTO_VEC-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to double ; AUTO_VEC-NEXT: [[TMP1:%.*]] = fmul fast double [[CAST_CRD]], 3.000000e+00 -; AUTO_VEC-NEXT: [[TMP2:%.*]] = add nsw i64 [[N_VEC]], -16 -; AUTO_VEC-NEXT: [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 4 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 -; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP4]], 3 -; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP2]], 48 -; AUTO_VEC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] -; AUTO_VEC: vector.ph.new: -; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP4]], 2305843009213693948 ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTO_VEC: vector.body: -; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x double> [ , [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT_3:%.*]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x double> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD5:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[STEP_ADD6:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP6:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[INDEX]] +; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[INDEX]] +; AUTO_VEC-NEXT: [[TMP3:%.*]] = bitcast double* [[TMP2]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], <4 x double>* [[TMP3]], align 8 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr double, double* [[TMP2]], i64 4 +; AUTO_VEC-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP4]] to <4 x double>* +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], <4 x double>* [[TMP5]], align 8 +; AUTO_VEC-NEXT: [[TMP6:%.*]] = getelementptr double, double* [[TMP2]], i64 8 ; AUTO_VEC-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], <4 x double>* [[TMP7]], align 8 -; AUTO_VEC-NEXT: [[TMP8:%.*]] = getelementptr double, double* [[TMP6]], i64 4 +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD5]], <4 x double>* [[TMP7]], align 8 +; AUTO_VEC-NEXT: [[TMP8:%.*]] = getelementptr double, double* [[TMP2]], i64 12 ; AUTO_VEC-NEXT: [[TMP9:%.*]] = bitcast double* [[TMP8]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], <4 x double>* [[TMP9]], align 8 -; AUTO_VEC-NEXT: [[TMP10:%.*]] = getelementptr double, double* [[TMP6]], i64 8 -; AUTO_VEC-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD5]], <4 x double>* [[TMP11]], align 8 -; AUTO_VEC-NEXT: [[TMP12:%.*]] = getelementptr double, double* [[TMP6]], i64 12 -; AUTO_VEC-NEXT: [[TMP13:%.*]] = bitcast double* [[TMP12]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD6]], <4 x double>* [[TMP13]], align 8 -; AUTO_VEC-NEXT: [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 16 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD5_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD6_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT]] -; AUTO_VEC-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_NEXT]], <4 x double>* [[TMP15]], align 8 -; AUTO_VEC-NEXT: [[TMP16:%.*]] = getelementptr double, double* [[TMP14]], i64 4 -; AUTO_VEC-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_1]], <4 x double>* [[TMP17]], align 8 -; AUTO_VEC-NEXT: [[TMP18:%.*]] = getelementptr double, double* [[TMP14]], i64 8 -; AUTO_VEC-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP18]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD5_1]], <4 x double>* [[TMP19]], align 8 -; AUTO_VEC-NEXT: [[TMP20:%.*]] = getelementptr double, double* [[TMP14]], i64 12 -; AUTO_VEC-NEXT: [[TMP21:%.*]] = bitcast double* [[TMP20]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD6_1]], <4 x double>* [[TMP21]], align 8 -; AUTO_VEC-NEXT: [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 32 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD5_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD6_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP22:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT_1]] -; AUTO_VEC-NEXT: [[TMP23:%.*]] = bitcast double* [[TMP22]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_NEXT_1]], <4 x double>* [[TMP23]], align 8 -; AUTO_VEC-NEXT: [[TMP24:%.*]] = getelementptr double, double* [[TMP22]], i64 4 -; AUTO_VEC-NEXT: [[TMP25:%.*]] = bitcast double* [[TMP24]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], <4 x double>* [[TMP25]], align 8 -; AUTO_VEC-NEXT: [[TMP26:%.*]] = getelementptr double, double* [[TMP22]], i64 8 -; AUTO_VEC-NEXT: [[TMP27:%.*]] = bitcast double* [[TMP26]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD5_2]], <4 x double>* [[TMP27]], align 8 -; AUTO_VEC-NEXT: [[TMP28:%.*]] = getelementptr double, double* [[TMP22]], i64 12 -; AUTO_VEC-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP28]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD6_2]], <4 x double>* [[TMP29]], align 8 -; AUTO_VEC-NEXT: [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 48 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD5_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD6_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[TMP30:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT_2]] -; AUTO_VEC-NEXT: [[TMP31:%.*]] = bitcast double* [[TMP30]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_NEXT_2]], <4 x double>* [[TMP31]], align 8 -; AUTO_VEC-NEXT: [[TMP32:%.*]] = getelementptr double, double* [[TMP30]], i64 4 -; AUTO_VEC-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_3]], <4 x double>* [[TMP33]], align 8 -; AUTO_VEC-NEXT: [[TMP34:%.*]] = getelementptr double, double* [[TMP30]], i64 8 -; AUTO_VEC-NEXT: [[TMP35:%.*]] = bitcast double* [[TMP34]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD5_3]], <4 x double>* [[TMP35]], align 8 -; AUTO_VEC-NEXT: [[TMP36:%.*]] = getelementptr double, double* [[TMP30]], i64 12 -; AUTO_VEC-NEXT: [[TMP37:%.*]] = bitcast double* [[TMP36]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD6_3]], <4 x double>* [[TMP37]], align 8 -; AUTO_VEC-NEXT: [[INDEX_NEXT_3]] = add i64 [[INDEX]], 64 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT_3]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[NITER_NSUB_3]] = add i64 [[NITER]], -4 -; AUTO_VEC-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0 -; AUTO_VEC-NEXT: br i1 [[NITER_NCMP_3]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], !llvm.loop !7 -; AUTO_VEC: middle.block.unr-lcssa: -; AUTO_VEC-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT_3]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[VEC_IND_UNR:%.*]] = phi <4 x double> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_3]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 -; AUTO_VEC-NEXT: br i1 [[LCMP_MOD]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY_EPIL:%.*]] -; AUTO_VEC: vector.body.epil: -; AUTO_VEC-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[INDEX_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] -; AUTO_VEC-NEXT: [[VEC_IND_EPIL:%.*]] = phi <4 x double> [ [[VEC_IND_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[VEC_IND_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] -; AUTO_VEC-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[XTRAITER]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] -; AUTO_VEC-NEXT: [[STEP_ADD_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], -; AUTO_VEC-NEXT: [[STEP_ADD5_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], -; AUTO_VEC-NEXT: [[STEP_ADD6_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], -; AUTO_VEC-NEXT: [[TMP38:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_EPIL]] -; AUTO_VEC-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND_EPIL]], <4 x double>* [[TMP39]], align 8 -; AUTO_VEC-NEXT: [[TMP40:%.*]] = getelementptr double, double* [[TMP38]], i64 4 -; AUTO_VEC-NEXT: [[TMP41:%.*]] = bitcast double* [[TMP40]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_EPIL]], <4 x double>* [[TMP41]], align 8 -; AUTO_VEC-NEXT: [[TMP42:%.*]] = getelementptr double, double* [[TMP38]], i64 8 -; AUTO_VEC-NEXT: [[TMP43:%.*]] = bitcast double* [[TMP42]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD5_EPIL]], <4 x double>* [[TMP43]], align 8 -; AUTO_VEC-NEXT: [[TMP44:%.*]] = getelementptr double, double* [[TMP38]], i64 12 -; AUTO_VEC-NEXT: [[TMP45:%.*]] = bitcast double* [[TMP44]] to <4 x double>* -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD6_EPIL]], <4 x double>* [[TMP45]], align 8 -; AUTO_VEC-NEXT: [[INDEX_NEXT_EPIL]] = add i64 [[INDEX_EPIL]], 16 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT_EPIL]] = fadd fast <4 x double> [[VEC_IND_EPIL]], -; AUTO_VEC-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 -; AUTO_VEC-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 -; AUTO_VEC-NEXT: br i1 [[EPIL_ITER_CMP]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], !llvm.loop !8 +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD6]], <4 x double>* [[TMP9]], align 8 +; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 +; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; AUTO_VEC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 ; AUTO_VEC: middle.block: ; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] -; AUTO_VEC-NEXT: [[TMP46:%.*]] = add nsw i64 [[N_VEC]], -1 -; AUTO_VEC-NEXT: [[CAST_CMO:%.*]] = sitofp i64 [[TMP46]] to double -; AUTO_VEC-NEXT: [[TMP47:%.*]] = fmul fast double [[CAST_CMO]], 3.000000e+00 +; AUTO_VEC-NEXT: [[TMP11:%.*]] = add nsw i64 [[N_VEC]], -1 +; AUTO_VEC-NEXT: [[CAST_CMO:%.*]] = sitofp i64 [[TMP11]] to double +; AUTO_VEC-NEXT: [[TMP12:%.*]] = fmul fast double [[CAST_CMO]], 3.000000e+00 ; AUTO_VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; AUTO_VEC: for.body: ; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] @@ -425,10 +245,10 @@ ; AUTO_VEC-NEXT: store double [[J]], double* [[TMP0]], align 8 ; AUTO_VEC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; AUTO_VEC-NEXT: [[J_NEXT]] = fadd fast double [[J]], 3.000000e+00 -; AUTO_VEC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[I_NEXT]], [[SMAX]] -; AUTO_VEC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !9 +; AUTO_VEC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] +; AUTO_VEC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop !7 ; AUTO_VEC: for.end: -; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[TMP47]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ] +; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ] ; AUTO_VEC-NEXT: ret double [[J_LCSSA]] ; entry: @@ -516,7 +336,7 @@ ; AUTO_VEC-NEXT: [[J_NEXT_EPIL]] = fadd double [[J_EPIL]], 3.000000e+00 ; AUTO_VEC-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 ; AUTO_VEC-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 -; AUTO_VEC-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_END]], label [[FOR_BODY_EPIL]], !llvm.loop !10 +; AUTO_VEC-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_END]], label [[FOR_BODY_EPIL]], !llvm.loop !8 ; AUTO_VEC: for.end: ; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[J_LCSSA_PH]], [[FOR_END_UNR_LCSSA]] ], [ [[J_EPIL]], [[FOR_BODY_EPIL]] ] ; AUTO_VEC-NEXT: ret double [[J_LCSSA]]