diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1440,6 +1440,17 @@ assert(LatchBlock && "PostInc mode requires a unique loop latch!"); Result = PN->getIncomingValueForBlock(LatchBlock); + // We might be introducing a new use of the post-inc IV that is not poison + // safe, in which case we should drop poison generating flags. Only keep + // those flags for which SCEV has proven that they always hold. + if (isa(Result)) { + auto *I = cast(Result); + if (!S->hasNoUnsignedWrap()) + I->setHasNoUnsignedWrap(false); + if (!S->hasNoSignedWrap()) + I->setHasNoSignedWrap(false); + } + // For an expansion to use the postinc form, the client must call // expandCodeFor with an InsertPoint that is either outside the PostIncLoop // or dominated by IVIncInsertPos. diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -27,11 +27,11 @@ ; CHECK-NEXT: beq .LBB0_4 ; CHECK-NEXT: @ %bb.2: @ %for.body.preheader ; CHECK-NEXT: subs r5, r3, #1 -; CHECK-NEXT: and r7, r3, #3 +; CHECK-NEXT: and lr, r3, #3 ; CHECK-NEXT: cmp r5, #3 ; CHECK-NEXT: bhs .LBB0_6 ; CHECK-NEXT: @ %bb.3: -; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: b .LBB0_8 ; CHECK-NEXT: .LBB0_4: @ %vector.ph ; CHECK-NEXT: mov.w r12, #0 @@ -46,44 +46,40 @@ ; CHECK-NEXT: letp lr, .LBB0_5 ; CHECK-NEXT: b .LBB0_11 ; CHECK-NEXT: .LBB0_6: @ %for.body.preheader.new -; CHECK-NEXT: bic r3, r3, #3 -; CHECK-NEXT: movs r5, #1 -; CHECK-NEXT: subs r3, #4 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w lr, r5, r3, lsr #2 +; CHECK-NEXT: sub.w r12, r3, lr +; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, r1, r3 -; CHECK-NEXT: adds r5, r2, r3 -; CHECK-NEXT: adds r6, r0, r3 -; CHECK-NEXT: adds r3, #16 -; CHECK-NEXT: vldr s0, [r4] -; CHECK-NEXT: add.w r12, r12, #4 -; CHECK-NEXT: vldr s2, [r5] +; CHECK-NEXT: adds r5, r1, r4 +; CHECK-NEXT: adds r6, r2, r4 +; CHECK-NEXT: adds r7, r0, r4 +; CHECK-NEXT: adds r3, #4 +; CHECK-NEXT: vldr s0, [r5] +; CHECK-NEXT: adds r4, #16 +; CHECK-NEXT: vldr s2, [r6] +; CHECK-NEXT: cmp r12, r3 ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vstr s0, [r6] -; CHECK-NEXT: vldr s0, [r4, #4] -; CHECK-NEXT: vldr s2, [r5, #4] +; CHECK-NEXT: vstr s0, [r7] +; CHECK-NEXT: vldr s0, [r5, #4] +; CHECK-NEXT: vldr s2, [r6, #4] ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vstr s0, [r6, #4] -; CHECK-NEXT: vldr s0, [r4, #8] -; CHECK-NEXT: vldr s2, [r5, #8] +; CHECK-NEXT: vstr s0, [r7, #4] +; CHECK-NEXT: vldr s0, [r5, #8] +; CHECK-NEXT: vldr s2, [r6, #8] ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vstr s0, [r6, #8] -; CHECK-NEXT: vldr s0, [r4, #12] -; CHECK-NEXT: vldr s2, [r5, #12] +; CHECK-NEXT: vstr s0, [r7, #8] +; CHECK-NEXT: vldr s0, [r5, #12] +; CHECK-NEXT: vldr s2, [r6, #12] ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vstr s0, [r6, #12] -; CHECK-NEXT: le lr, .LBB0_7 +; CHECK-NEXT: vstr s0, [r7, #12] +; CHECK-NEXT: bne .LBB0_7 ; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup.loopexit.unr-lcssa -; CHECK-NEXT: wls lr, r7, .LBB0_11 +; CHECK-NEXT: wls lr, lr, .LBB0_11 ; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader -; CHECK-NEXT: add.w r1, r1, r12, lsl #2 -; CHECK-NEXT: add.w r2, r2, r12, lsl #2 -; CHECK-NEXT: add.w r0, r0, r12, lsl #2 -; CHECK-NEXT: mov lr, r7 +; CHECK-NEXT: add.w r1, r1, r3, lsl #2 +; CHECK-NEXT: add.w r2, r2, r3, lsl #2 +; CHECK-NEXT: add.w r0, r0, r3, lsl #2 ; CHECK-NEXT: .LBB0_10: @ %for.body.epil ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldr s0, [r1] diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -1459,58 +1459,53 @@ ; CHECK-NEXT: cbz r2, .LBB9_3 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: subs r3, r2, #1 -; CHECK-NEXT: and r5, r2, #3 +; CHECK-NEXT: and lr, r2, #3 +; CHECK-NEXT: vldr s0, .LCPI9_0 ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhs .LBB9_4 ; CHECK-NEXT: @ %bb.2: -; CHECK-NEXT: vldr s0, .LCPI9_0 -; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: b .LBB9_6 ; CHECK-NEXT: .LBB9_3: ; CHECK-NEXT: vldr s0, .LCPI9_0 ; CHECK-NEXT: b .LBB9_9 ; CHECK-NEXT: .LBB9_4: @ %for.body.preheader.new -; CHECK-NEXT: bic r2, r2, #3 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vldr s0, .LCPI9_0 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w lr, r3, r2, lsr #2 +; CHECK-NEXT: sub.w r12, r2, lr ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: .LBB9_5: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, r0, r3 -; CHECK-NEXT: adds r2, r1, r3 -; CHECK-NEXT: vldr.16 s2, [r2, #6] -; CHECK-NEXT: vldr.16 s4, [r4, #6] -; CHECK-NEXT: vldr.16 s6, [r4, #4] -; CHECK-NEXT: vldr.16 s8, [r4, #2] +; CHECK-NEXT: adds r5, r0, r3 +; CHECK-NEXT: adds r4, r1, r3 +; CHECK-NEXT: vldr.16 s2, [r4, #6] +; CHECK-NEXT: vldr.16 s4, [r5, #6] +; CHECK-NEXT: vldr.16 s6, [r5, #4] +; CHECK-NEXT: vldr.16 s8, [r5, #2] ; CHECK-NEXT: vmul.f16 s2, s4, s2 -; CHECK-NEXT: vldr.16 s4, [r2, #4] -; CHECK-NEXT: vldr.16 s10, [r4] +; CHECK-NEXT: vldr.16 s4, [r4, #4] +; CHECK-NEXT: vldr.16 s10, [r5] ; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vmul.f16 s4, s6, s4 -; CHECK-NEXT: vldr.16 s6, [r2, #2] +; CHECK-NEXT: vldr.16 s6, [r4, #2] ; CHECK-NEXT: vcvtb.f32.f16 s4, s4 -; CHECK-NEXT: adds r3, #8 +; CHECK-NEXT: adds r2, #4 ; CHECK-NEXT: vmul.f16 s6, s8, s6 -; CHECK-NEXT: vldr.16 s8, [r2] +; CHECK-NEXT: vldr.16 s8, [r4] ; CHECK-NEXT: vcvtb.f32.f16 s6, s6 -; CHECK-NEXT: add.w r12, r12, #4 +; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vmul.f16 s8, s10, s8 +; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: vcvtb.f32.f16 s8, s8 ; CHECK-NEXT: vadd.f32 s0, s0, s8 ; CHECK-NEXT: vadd.f32 s0, s0, s6 ; CHECK-NEXT: vadd.f32 s0, s0, s4 ; CHECK-NEXT: vadd.f32 s0, s0, s2 -; CHECK-NEXT: le lr, .LBB9_5 +; CHECK-NEXT: bne .LBB9_5 ; CHECK-NEXT: .LBB9_6: @ %for.cond.cleanup.loopexit.unr-lcssa -; CHECK-NEXT: wls lr, r5, .LBB9_9 +; CHECK-NEXT: wls lr, lr, .LBB9_9 ; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader -; CHECK-NEXT: add.w r0, r0, r12, lsl #1 -; CHECK-NEXT: add.w r1, r1, r12, lsl #1 -; CHECK-NEXT: mov lr, r5 +; CHECK-NEXT: add.w r0, r0, r2, lsl #1 +; CHECK-NEXT: add.w r1, r1, r2, lsl #1 ; CHECK-NEXT: .LBB9_8: @ %for.body.epil ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldr.16 s2, [r1] @@ -1616,58 +1611,53 @@ ; CHECK-NEXT: cbz r2, .LBB10_3 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: subs r3, r2, #1 -; CHECK-NEXT: and r5, r2, #3 +; CHECK-NEXT: and lr, r2, #3 +; CHECK-NEXT: vldr s0, .LCPI10_0 ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhs .LBB10_4 ; CHECK-NEXT: @ %bb.2: -; CHECK-NEXT: vldr s0, .LCPI10_0 -; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: b .LBB10_6 ; CHECK-NEXT: .LBB10_3: ; CHECK-NEXT: vldr s0, .LCPI10_0 ; CHECK-NEXT: b .LBB10_9 ; CHECK-NEXT: .LBB10_4: @ %for.body.preheader.new -; CHECK-NEXT: bic r2, r2, #3 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vldr s0, .LCPI10_0 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w lr, r3, r2, lsr #2 +; CHECK-NEXT: sub.w r12, r2, lr ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: .LBB10_5: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, r0, r3 -; CHECK-NEXT: adds r2, r1, r3 -; CHECK-NEXT: vldr.16 s2, [r2, #6] -; CHECK-NEXT: vldr.16 s4, [r4, #6] -; CHECK-NEXT: vldr.16 s6, [r4, #4] -; CHECK-NEXT: vldr.16 s8, [r4, #2] +; CHECK-NEXT: adds r5, r0, r3 +; CHECK-NEXT: adds r4, r1, r3 +; CHECK-NEXT: vldr.16 s2, [r4, #6] +; CHECK-NEXT: vldr.16 s4, [r5, #6] +; CHECK-NEXT: vldr.16 s6, [r5, #4] +; CHECK-NEXT: vldr.16 s8, [r5, #2] ; CHECK-NEXT: vadd.f16 s2, s4, s2 -; CHECK-NEXT: vldr.16 s4, [r2, #4] -; CHECK-NEXT: vldr.16 s10, [r4] +; CHECK-NEXT: vldr.16 s4, [r4, #4] +; CHECK-NEXT: vldr.16 s10, [r5] ; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vadd.f16 s4, s6, s4 -; CHECK-NEXT: vldr.16 s6, [r2, #2] +; CHECK-NEXT: vldr.16 s6, [r4, #2] ; CHECK-NEXT: vcvtb.f32.f16 s4, s4 -; CHECK-NEXT: adds r3, #8 +; CHECK-NEXT: adds r2, #4 ; CHECK-NEXT: vadd.f16 s6, s8, s6 -; CHECK-NEXT: vldr.16 s8, [r2] +; CHECK-NEXT: vldr.16 s8, [r4] ; CHECK-NEXT: vcvtb.f32.f16 s6, s6 -; CHECK-NEXT: add.w r12, r12, #4 +; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vadd.f16 s8, s10, s8 +; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: vcvtb.f32.f16 s8, s8 ; CHECK-NEXT: vadd.f32 s0, s0, s8 ; CHECK-NEXT: vadd.f32 s0, s0, s6 ; CHECK-NEXT: vadd.f32 s0, s0, s4 ; CHECK-NEXT: vadd.f32 s0, s0, s2 -; CHECK-NEXT: le lr, .LBB10_5 +; CHECK-NEXT: bne .LBB10_5 ; CHECK-NEXT: .LBB10_6: @ %for.cond.cleanup.loopexit.unr-lcssa -; CHECK-NEXT: wls lr, r5, .LBB10_9 +; CHECK-NEXT: wls lr, lr, .LBB10_9 ; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader -; CHECK-NEXT: add.w r0, r0, r12, lsl #1 -; CHECK-NEXT: add.w r1, r1, r12, lsl #1 -; CHECK-NEXT: mov lr, r5 +; CHECK-NEXT: add.w r0, r0, r2, lsl #1 +; CHECK-NEXT: add.w r1, r1, r2, lsl #1 ; CHECK-NEXT: .LBB10_8: @ %for.body.epil ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldr.16 s2, [r1] @@ -1773,65 +1763,60 @@ ; CHECK-NEXT: cbz r2, .LBB11_3 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: subs r3, r2, #1 -; CHECK-NEXT: and r6, r2, #3 +; CHECK-NEXT: and lr, r2, #3 +; CHECK-NEXT: vldr s0, .LCPI11_0 ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhs .LBB11_4 ; CHECK-NEXT: @ %bb.2: -; CHECK-NEXT: vldr s0, .LCPI11_0 -; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: b .LBB11_6 ; CHECK-NEXT: .LBB11_3: ; CHECK-NEXT: vldr s0, .LCPI11_0 ; CHECK-NEXT: b .LBB11_9 ; CHECK-NEXT: .LBB11_4: @ %for.body.preheader.new -; CHECK-NEXT: bic r2, r2, #3 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vldr s0, .LCPI11_0 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w lr, r3, r2, lsr #2 +; CHECK-NEXT: sub.w r12, r2, lr ; CHECK-NEXT: adds r3, r1, #4 -; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: adds r2, r0, #4 +; CHECK-NEXT: adds r4, r0, #4 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: .LBB11_5: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsh.w r4, [r3, #2] -; CHECK-NEXT: vldr.16 s2, [r2, #2] -; CHECK-NEXT: add.w r12, r12, #4 -; CHECK-NEXT: vmov s4, r4 -; CHECK-NEXT: ldrsh r4, [r3], #8 +; CHECK-NEXT: ldrsh.w r5, [r3, #2] +; CHECK-NEXT: vldr.16 s2, [r4, #2] +; CHECK-NEXT: adds r2, #4 +; CHECK-NEXT: cmp r12, r2 +; CHECK-NEXT: vmov s4, r5 +; CHECK-NEXT: ldrsh r5, [r3], #8 ; CHECK-NEXT: vcvt.f16.s32 s4, s4 -; CHECK-NEXT: ldrsh r5, [r3, #-10] +; CHECK-NEXT: ldrsh r6, [r3, #-10] ; CHECK-NEXT: vmul.f16 s2, s2, s4 -; CHECK-NEXT: vmov s6, r4 -; CHECK-NEXT: vldr.16 s4, [r2] +; CHECK-NEXT: vmov s6, r5 +; CHECK-NEXT: vldr.16 s4, [r4] ; CHECK-NEXT: vcvt.f16.s32 s6, s6 -; CHECK-NEXT: ldrsh r4, [r3, #-12] +; CHECK-NEXT: ldrsh r5, [r3, #-12] ; CHECK-NEXT: vmul.f16 s4, s4, s6 -; CHECK-NEXT: vmov s8, r5 -; CHECK-NEXT: vldr.16 s6, [r2, #-2] +; CHECK-NEXT: vmov s8, r6 +; CHECK-NEXT: vldr.16 s6, [r4, #-2] ; CHECK-NEXT: vcvt.f16.s32 s8, s8 -; CHECK-NEXT: vmov s10, r4 +; CHECK-NEXT: vmov s10, r5 ; CHECK-NEXT: vcvtb.f32.f16 s4, s4 ; CHECK-NEXT: vmul.f16 s6, s6, s8 -; CHECK-NEXT: vldr.16 s8, [r2, #-4] +; CHECK-NEXT: vldr.16 s8, [r4, #-4] ; CHECK-NEXT: vcvt.f16.s32 s10, s10 ; CHECK-NEXT: vcvtb.f32.f16 s6, s6 ; CHECK-NEXT: vmul.f16 s8, s8, s10 ; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vcvtb.f32.f16 s8, s8 -; CHECK-NEXT: adds r2, #8 +; CHECK-NEXT: add.w r4, r4, #8 ; CHECK-NEXT: vadd.f32 s0, s0, s8 ; CHECK-NEXT: vadd.f32 s0, s0, s6 ; CHECK-NEXT: vadd.f32 s0, s0, s4 ; CHECK-NEXT: vadd.f32 s0, s0, s2 -; CHECK-NEXT: le lr, .LBB11_5 +; CHECK-NEXT: bne .LBB11_5 ; CHECK-NEXT: .LBB11_6: @ %for.cond.cleanup.loopexit.unr-lcssa -; CHECK-NEXT: wls lr, r6, .LBB11_9 +; CHECK-NEXT: wls lr, lr, .LBB11_9 ; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader -; CHECK-NEXT: add.w r0, r0, r12, lsl #1 -; CHECK-NEXT: add.w r1, r1, r12, lsl #1 -; CHECK-NEXT: mov lr, r6 +; CHECK-NEXT: add.w r0, r0, r2, lsl #1 +; CHECK-NEXT: add.w r1, r1, r2, lsl #1 ; CHECK-NEXT: .LBB11_8: @ %for.body.epil ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh r2, [r1], #2 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -387,37 +387,37 @@ ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq.w .LBB5_11 ; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph -; CHECK-NEXT: add.w r4, r3, r12, lsl #2 -; CHECK-NEXT: add.w r5, r1, r12 -; CHECK-NEXT: cmp r4, r1 -; CHECK-NEXT: add.w r6, r0, r12 -; CHECK-NEXT: cset lr, hi -; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: cset r5, hi -; CHECK-NEXT: cmp r4, r0 -; CHECK-NEXT: cset r4, hi +; CHECK-NEXT: add.w r5, r3, r12, lsl #2 +; CHECK-NEXT: add.w r6, r1, r12 +; CHECK-NEXT: cmp r5, r1 +; CHECK-NEXT: add.w r4, r0, r12 +; CHECK-NEXT: cset r7, hi ; CHECK-NEXT: cmp r6, r3 ; CHECK-NEXT: cset r6, hi -; CHECK-NEXT: ands r4, r6 -; CHECK-NEXT: lsls r4, r4, #31 +; CHECK-NEXT: cmp r5, r0 +; CHECK-NEXT: cset r5, hi +; CHECK-NEXT: cmp r4, r3 +; CHECK-NEXT: cset r4, hi +; CHECK-NEXT: ands r5, r4 +; CHECK-NEXT: lsls r5, r5, #31 ; CHECK-NEXT: itt eq -; CHECK-NEXT: andeq.w r6, r5, lr -; CHECK-NEXT: lslseq.w r6, r6, #31 +; CHECK-NEXT: andeq r7, r6 +; CHECK-NEXT: lslseq.w r7, r7, #31 ; CHECK-NEXT: beq .LBB5_4 ; CHECK-NEXT: @ %bb.2: @ %for.body.preheader -; CHECK-NEXT: sub.w r6, r12, #1 -; CHECK-NEXT: and r9, r12, #3 -; CHECK-NEXT: cmp r6, #3 +; CHECK-NEXT: sub.w r4, r12, #1 +; CHECK-NEXT: and lr, r12, #3 +; CHECK-NEXT: cmp r4, #3 ; CHECK-NEXT: bhs .LBB5_6 ; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB5_8 ; CHECK-NEXT: .LBB5_4: @ %vector.ph -; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB5_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r6, #4 +; CHECK-NEXT: adds r7, #4 ; CHECK-NEXT: vldrb.u32 q0, [r0], #4 ; CHECK-NEXT: vldrb.u32 q1, [r1], #4 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 @@ -425,49 +425,45 @@ ; CHECK-NEXT: letp lr, .LBB5_5 ; CHECK-NEXT: b .LBB5_11 ; CHECK-NEXT: .LBB5_6: @ %for.body.preheader.new -; CHECK-NEXT: bic r6, r12, #3 -; CHECK-NEXT: movs r5, #1 -; CHECK-NEXT: subs r6, #4 -; CHECK-NEXT: add.w r4, r3, #8 +; CHECK-NEXT: sub.w r8, r12, lr +; CHECK-NEXT: add.w r5, r3, #8 +; CHECK-NEXT: adds r6, r0, #3 +; CHECK-NEXT: adds r7, r1, #1 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w lr, r5, r6, lsr #2 -; CHECK-NEXT: adds r5, r0, #3 -; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: adds r6, r1, #1 ; CHECK-NEXT: .LBB5_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb r8, [r5, #-3] +; CHECK-NEXT: ldrb r9, [r6, #-3] ; CHECK-NEXT: add.w r12, r12, #4 -; CHECK-NEXT: ldrb r7, [r6, #-1] -; CHECK-NEXT: smlabb r7, r7, r8, r2 -; CHECK-NEXT: str r7, [r4, #-8] -; CHECK-NEXT: ldrb r8, [r5, #-2] -; CHECK-NEXT: ldrb r7, [r6], #4 -; CHECK-NEXT: smlabb r7, r7, r8, r2 -; CHECK-NEXT: str r7, [r4, #-4] -; CHECK-NEXT: ldrb r8, [r5, #-1] -; CHECK-NEXT: ldrb r7, [r6, #-3] -; CHECK-NEXT: smlabb r7, r7, r8, r2 -; CHECK-NEXT: str r7, [r4] -; CHECK-NEXT: ldrb r8, [r5], #4 -; CHECK-NEXT: ldrb r7, [r6, #-2] -; CHECK-NEXT: smlabb r7, r7, r8, r2 -; CHECK-NEXT: str r7, [r4, #4] -; CHECK-NEXT: adds r4, #16 -; CHECK-NEXT: le lr, .LBB5_7 +; CHECK-NEXT: ldrb r4, [r7, #-1] +; CHECK-NEXT: cmp r8, r12 +; CHECK-NEXT: smlabb r4, r4, r9, r2 +; CHECK-NEXT: str r4, [r5, #-8] +; CHECK-NEXT: ldrb r9, [r6, #-2] +; CHECK-NEXT: ldrb r4, [r7], #4 +; CHECK-NEXT: smlabb r4, r4, r9, r2 +; CHECK-NEXT: str r4, [r5, #-4] +; CHECK-NEXT: ldrb r9, [r6, #-1] +; CHECK-NEXT: ldrb r4, [r7, #-3] +; CHECK-NEXT: smlabb r4, r4, r9, r2 +; CHECK-NEXT: str r4, [r5] +; CHECK-NEXT: ldrb r9, [r6], #4 +; CHECK-NEXT: ldrb r4, [r7, #-2] +; CHECK-NEXT: smlabb r4, r4, r9, r2 +; CHECK-NEXT: str r4, [r5, #4] +; CHECK-NEXT: add.w r5, r5, #16 +; CHECK-NEXT: bne .LBB5_7 ; CHECK-NEXT: .LBB5_8: @ %for.cond.cleanup.loopexit.unr-lcssa -; CHECK-NEXT: wls lr, r9, .LBB5_11 +; CHECK-NEXT: wls lr, lr, .LBB5_11 ; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader ; CHECK-NEXT: add r0, r12 ; CHECK-NEXT: add r1, r12 ; CHECK-NEXT: add.w r3, r3, r12, lsl #2 -; CHECK-NEXT: mov lr, r9 ; CHECK-NEXT: .LBB5_10: @ %for.body.epil ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb r6, [r0], #1 -; CHECK-NEXT: ldrb r5, [r1], #1 -; CHECK-NEXT: smlabb r6, r5, r6, r2 -; CHECK-NEXT: str r6, [r3], #4 +; CHECK-NEXT: ldrb r7, [r0], #1 +; CHECK-NEXT: ldrb r6, [r1], #1 +; CHECK-NEXT: smlabb r7, r6, r7, r2 +; CHECK-NEXT: str r7, [r3], #4 ; CHECK-NEXT: le lr, .LBB5_10 ; CHECK-NEXT: .LBB5_11: @ %for.cond.cleanup ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} @@ -689,37 +685,37 @@ ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq.w .LBB7_11 ; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph -; CHECK-NEXT: add.w r4, r3, r12, lsl #2 -; CHECK-NEXT: add.w r5, r1, r12 -; CHECK-NEXT: cmp r4, r1 -; CHECK-NEXT: add.w r6, r0, r12 -; CHECK-NEXT: cset lr, hi -; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: cset r5, hi -; CHECK-NEXT: cmp r4, r0 -; CHECK-NEXT: cset r4, hi +; CHECK-NEXT: add.w r5, r3, r12, lsl #2 +; CHECK-NEXT: add.w r6, r1, r12 +; CHECK-NEXT: cmp r5, r1 +; CHECK-NEXT: add.w r4, r0, r12 +; CHECK-NEXT: cset r7, hi ; CHECK-NEXT: cmp r6, r3 ; CHECK-NEXT: cset r6, hi -; CHECK-NEXT: ands r4, r6 -; CHECK-NEXT: lsls r4, r4, #31 +; CHECK-NEXT: cmp r5, r0 +; CHECK-NEXT: cset r5, hi +; CHECK-NEXT: cmp r4, r3 +; CHECK-NEXT: cset r4, hi +; CHECK-NEXT: ands r5, r4 +; CHECK-NEXT: lsls r5, r5, #31 ; CHECK-NEXT: itt eq -; CHECK-NEXT: andeq.w r6, r5, lr -; CHECK-NEXT: lslseq.w r6, r6, #31 +; CHECK-NEXT: andeq r7, r6 +; CHECK-NEXT: lslseq.w r7, r7, #31 ; CHECK-NEXT: beq .LBB7_4 ; CHECK-NEXT: @ %bb.2: @ %for.body.preheader -; CHECK-NEXT: sub.w r6, r12, #1 -; CHECK-NEXT: and r9, r12, #3 -; CHECK-NEXT: cmp r6, #3 +; CHECK-NEXT: sub.w r4, r12, #1 +; CHECK-NEXT: and lr, r12, #3 +; CHECK-NEXT: cmp r4, #3 ; CHECK-NEXT: bhs .LBB7_6 ; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB7_8 ; CHECK-NEXT: .LBB7_4: @ %vector.ph -; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB7_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r6, #4 +; CHECK-NEXT: adds r7, #4 ; CHECK-NEXT: vldrb.u32 q0, [r0], #4 ; CHECK-NEXT: vldrb.u32 q1, [r1], #4 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 @@ -727,49 +723,45 @@ ; CHECK-NEXT: letp lr, .LBB7_5 ; CHECK-NEXT: b .LBB7_11 ; CHECK-NEXT: .LBB7_6: @ %for.body.preheader.new -; CHECK-NEXT: bic r6, r12, #3 -; CHECK-NEXT: movs r5, #1 -; CHECK-NEXT: subs r6, #4 -; CHECK-NEXT: add.w r4, r3, #8 +; CHECK-NEXT: sub.w r8, r12, lr +; CHECK-NEXT: add.w r5, r3, #8 +; CHECK-NEXT: adds r6, r0, #3 +; CHECK-NEXT: adds r7, r1, #1 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w lr, r5, r6, lsr #2 -; CHECK-NEXT: adds r5, r0, #3 -; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: adds r6, r1, #1 ; CHECK-NEXT: .LBB7_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb r8, [r5, #-3] +; CHECK-NEXT: ldrb r9, [r6, #-3] ; CHECK-NEXT: add.w r12, r12, #4 -; CHECK-NEXT: ldrb r7, [r6, #-1] -; CHECK-NEXT: smlabb r7, r7, r8, r2 -; CHECK-NEXT: str r7, [r4, #-8] -; CHECK-NEXT: ldrb r8, [r5, #-2] -; CHECK-NEXT: ldrb r7, [r6], #4 -; CHECK-NEXT: smlabb r7, r7, r8, r2 -; CHECK-NEXT: str r7, [r4, #-4] -; CHECK-NEXT: ldrb r8, [r5, #-1] -; CHECK-NEXT: ldrb r7, [r6, #-3] -; CHECK-NEXT: smlabb r7, r7, r8, r2 -; CHECK-NEXT: str r7, [r4] -; CHECK-NEXT: ldrb r8, [r5], #4 -; CHECK-NEXT: ldrb r7, [r6, #-2] -; CHECK-NEXT: smlabb r7, r7, r8, r2 -; CHECK-NEXT: str r7, [r4, #4] -; CHECK-NEXT: adds r4, #16 -; CHECK-NEXT: le lr, .LBB7_7 +; CHECK-NEXT: ldrb r4, [r7, #-1] +; CHECK-NEXT: cmp r8, r12 +; CHECK-NEXT: smlabb r4, r4, r9, r2 +; CHECK-NEXT: str r4, [r5, #-8] +; CHECK-NEXT: ldrb r9, [r6, #-2] +; CHECK-NEXT: ldrb r4, [r7], #4 +; CHECK-NEXT: smlabb r4, r4, r9, r2 +; CHECK-NEXT: str r4, [r5, #-4] +; CHECK-NEXT: ldrb r9, [r6, #-1] +; CHECK-NEXT: ldrb r4, [r7, #-3] +; CHECK-NEXT: smlabb r4, r4, r9, r2 +; CHECK-NEXT: str r4, [r5] +; CHECK-NEXT: ldrb r9, [r6], #4 +; CHECK-NEXT: ldrb r4, [r7, #-2] +; CHECK-NEXT: smlabb r4, r4, r9, r2 +; CHECK-NEXT: str r4, [r5, #4] +; CHECK-NEXT: add.w r5, r5, #16 +; CHECK-NEXT: bne .LBB7_7 ; CHECK-NEXT: .LBB7_8: @ %for.cond.cleanup.loopexit.unr-lcssa -; CHECK-NEXT: wls lr, r9, .LBB7_11 +; CHECK-NEXT: wls lr, lr, .LBB7_11 ; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader ; CHECK-NEXT: add r0, r12 ; CHECK-NEXT: add r1, r12 ; CHECK-NEXT: add.w r3, r3, r12, lsl #2 -; CHECK-NEXT: mov lr, r9 ; CHECK-NEXT: .LBB7_10: @ %for.body.epil ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb r6, [r0], #1 -; CHECK-NEXT: ldrb r5, [r1], #1 -; CHECK-NEXT: smlabb r6, r5, r6, r2 -; CHECK-NEXT: str r6, [r3], #4 +; CHECK-NEXT: ldrb r7, [r0], #1 +; CHECK-NEXT: ldrb r6, [r1], #1 +; CHECK-NEXT: smlabb r7, r6, r7, r2 +; CHECK-NEXT: str r7, [r3], #4 ; CHECK-NEXT: le lr, .LBB7_10 ; CHECK-NEXT: .LBB7_11: @ %for.cond.cleanup ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} @@ -991,37 +983,37 @@ ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq.w .LBB9_11 ; CHECK-NEXT: @ %bb.1: @ %vector.memcheck -; CHECK-NEXT: add.w r4, r3, r12, lsl #2 -; CHECK-NEXT: add.w r5, r1, r12, lsl #2 -; CHECK-NEXT: cmp r4, r1 -; CHECK-NEXT: add.w r6, r0, r12, lsl #2 -; CHECK-NEXT: cset lr, hi -; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: cset r5, hi -; CHECK-NEXT: cmp r4, r0 -; CHECK-NEXT: cset r4, hi +; CHECK-NEXT: add.w r5, r3, r12, lsl #2 +; CHECK-NEXT: add.w r6, r1, r12, lsl #2 +; CHECK-NEXT: cmp r5, r1 +; CHECK-NEXT: add.w r4, r0, r12, lsl #2 +; CHECK-NEXT: cset r7, hi ; CHECK-NEXT: cmp r6, r3 ; CHECK-NEXT: cset r6, hi -; CHECK-NEXT: ands r4, r6 -; CHECK-NEXT: lsls r4, r4, #31 +; CHECK-NEXT: cmp r5, r0 +; CHECK-NEXT: cset r5, hi +; CHECK-NEXT: cmp r4, r3 +; CHECK-NEXT: cset r4, hi +; CHECK-NEXT: ands r5, r4 +; CHECK-NEXT: lsls r5, r5, #31 ; CHECK-NEXT: itt eq -; CHECK-NEXT: andeq.w r6, r5, lr -; CHECK-NEXT: lslseq.w r6, r6, #31 +; CHECK-NEXT: andeq r7, r6 +; CHECK-NEXT: lslseq.w r7, r7, #31 ; CHECK-NEXT: beq .LBB9_4 ; CHECK-NEXT: @ %bb.2: @ %for.body.preheader -; CHECK-NEXT: sub.w r6, r12, #1 -; CHECK-NEXT: and r9, r12, #3 -; CHECK-NEXT: cmp r6, #3 +; CHECK-NEXT: sub.w r4, r12, #1 +; CHECK-NEXT: and lr, r12, #3 +; CHECK-NEXT: cmp r4, #3 ; CHECK-NEXT: bhs .LBB9_6 ; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB9_8 ; CHECK-NEXT: .LBB9_4: @ %vector.ph -; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB9_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r6, #4 +; CHECK-NEXT: adds r7, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 @@ -1029,51 +1021,47 @@ ; CHECK-NEXT: letp lr, .LBB9_5 ; CHECK-NEXT: b .LBB9_11 ; CHECK-NEXT: .LBB9_6: @ %for.body.preheader.new -; CHECK-NEXT: bic r6, r12, #3 -; CHECK-NEXT: movs r5, #1 -; CHECK-NEXT: subs r6, #4 -; CHECK-NEXT: add.w r4, r3, #8 +; CHECK-NEXT: sub.w r8, r12, lr +; CHECK-NEXT: add.w r5, r3, #8 +; CHECK-NEXT: add.w r6, r0, #8 +; CHECK-NEXT: add.w r7, r1, #8 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: add.w lr, r5, r6, lsr #2 -; CHECK-NEXT: add.w r5, r0, #8 -; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: add.w r6, r1, #8 ; CHECK-NEXT: .LBB9_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr r8, [r5, #-8] +; CHECK-NEXT: ldr r9, [r6, #-8] ; CHECK-NEXT: add.w r12, r12, #4 -; CHECK-NEXT: ldr r7, [r6, #-8] -; CHECK-NEXT: mla r7, r7, r8, r2 -; CHECK-NEXT: str r7, [r4, #-8] -; CHECK-NEXT: ldr r8, [r5, #-4] -; CHECK-NEXT: ldr r7, [r6, #-4] -; CHECK-NEXT: mla r7, r7, r8, r2 -; CHECK-NEXT: str r7, [r4, #-4] -; CHECK-NEXT: ldr.w r8, [r5] -; CHECK-NEXT: ldr r7, [r6] -; CHECK-NEXT: mla r7, r7, r8, r2 -; CHECK-NEXT: str r7, [r4] -; CHECK-NEXT: ldr.w r8, [r5, #4] -; CHECK-NEXT: adds r5, #16 -; CHECK-NEXT: ldr r7, [r6, #4] -; CHECK-NEXT: adds r6, #16 -; CHECK-NEXT: mla r7, r7, r8, r2 -; CHECK-NEXT: str r7, [r4, #4] -; CHECK-NEXT: adds r4, #16 -; CHECK-NEXT: le lr, .LBB9_7 +; CHECK-NEXT: ldr r4, [r7, #-8] +; CHECK-NEXT: cmp r8, r12 +; CHECK-NEXT: mla r4, r4, r9, r2 +; CHECK-NEXT: str r4, [r5, #-8] +; CHECK-NEXT: ldr r9, [r6, #-4] +; CHECK-NEXT: ldr r4, [r7, #-4] +; CHECK-NEXT: mla r4, r4, r9, r2 +; CHECK-NEXT: str r4, [r5, #-4] +; CHECK-NEXT: ldr.w r9, [r6] +; CHECK-NEXT: ldr r4, [r7] +; CHECK-NEXT: mla r4, r4, r9, r2 +; CHECK-NEXT: str r4, [r5] +; CHECK-NEXT: ldr.w r9, [r6, #4] +; CHECK-NEXT: add.w r6, r6, #16 +; CHECK-NEXT: ldr r4, [r7, #4] +; CHECK-NEXT: add.w r7, r7, #16 +; CHECK-NEXT: mla r4, r4, r9, r2 +; CHECK-NEXT: str r4, [r5, #4] +; CHECK-NEXT: add.w r5, r5, #16 +; CHECK-NEXT: bne .LBB9_7 ; CHECK-NEXT: .LBB9_8: @ %for.cond.cleanup.loopexit.unr-lcssa -; CHECK-NEXT: wls lr, r9, .LBB9_11 +; CHECK-NEXT: wls lr, lr, .LBB9_11 ; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader ; CHECK-NEXT: add.w r0, r0, r12, lsl #2 ; CHECK-NEXT: add.w r1, r1, r12, lsl #2 ; CHECK-NEXT: add.w r3, r3, r12, lsl #2 -; CHECK-NEXT: mov lr, r9 ; CHECK-NEXT: .LBB9_10: @ %for.body.epil ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr r6, [r0], #4 -; CHECK-NEXT: ldr r5, [r1], #4 -; CHECK-NEXT: mla r6, r5, r6, r2 -; CHECK-NEXT: str r6, [r3], #4 +; CHECK-NEXT: ldr r7, [r0], #4 +; CHECK-NEXT: ldr r6, [r1], #4 +; CHECK-NEXT: mla r7, r6, r7, r2 +; CHECK-NEXT: str r7, [r3], #4 ; CHECK-NEXT: le lr, .LBB9_10 ; CHECK-NEXT: .LBB9_11: @ %for.cond.cleanup ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll --- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll @@ -7,6 +7,7 @@ declare void @use(i8 zeroext) declare void @use_p(i8*) +; nuw needs to be dropped when switching to post-inc comparison. define i8 @drop_nuw() { ; CHECK-LABEL: @drop_nuw( ; CHECK-NEXT: entry: @@ -14,7 +15,7 @@ ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: call void @use(i8 [[IV]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: @@ -36,6 +37,7 @@ ret i8 %iv } +; nsw needs to be dropped when switching to post-inc comparison. define i8 @drop_nsw() { ; CHECK-LABEL: @drop_nsw( ; CHECK-NEXT: entry: @@ -43,7 +45,7 @@ ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 127, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: call void @use(i8 [[IV]]) -; CHECK-NEXT: [[IV_NEXT]] = add nsw i8 [[IV]], -1 +; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], -1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], 127 ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: @@ -65,6 +67,7 @@ ret i8 %iv } +; Comparison already in post-inc form, no need to drop nuw. define i8 @already_postinc() { ; CHECK-LABEL: @already_postinc( ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll --- a/llvm/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll @@ -17,7 +17,7 @@ ; CHECK: do.body: ; CHECK-NEXT: [[I_0:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ] ; CHECK-NEXT: tail call void @goo(i64 [[I_0]], i64 [[I_0]]) -; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_0]], 1 +; CHECK-NEXT: [[INC]] = add nuw i64 [[I_0]], 1 ; CHECK-NEXT: [[T0:%.*]] = load i64, i64* @cond, align 8 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[T0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[DO_BODY2_PREHEADER:%.*]], label [[DO_BODY]] @@ -27,7 +27,7 @@ ; CHECK-NEXT: [[I_1:%.*]] = phi i64 [ [[INC3:%.*]], [[DO_BODY2]] ], [ 0, [[DO_BODY2_PREHEADER]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INC]], [[I_1]] ; CHECK-NEXT: tail call void @goo(i64 [[I_1]], i64 [[TMP0]]) -; CHECK-NEXT: [[INC3]] = add nuw nsw i64 [[I_1]], 1 +; CHECK-NEXT: [[INC3]] = add nuw i64 [[I_1]], 1 ; CHECK-NEXT: [[T1:%.*]] = load i64, i64* @cond, align 8 ; CHECK-NEXT: [[TOBOOL6:%.*]] = icmp eq i64 [[T1]], 0 ; CHECK-NEXT: br i1 [[TOBOOL6]], label [[DO_BODY8_PREHEADER:%.*]], label [[DO_BODY2]] @@ -39,7 +39,7 @@ ; CHECK-NEXT: [[J_2:%.*]] = phi i64 [ [[INC10:%.*]], [[DO_BODY8]] ], [ [[TMP1]], [[DO_BODY8_PREHEADER]] ] ; CHECK-NEXT: tail call void @goo(i64 [[I_2]], i64 [[J_2]]) ; CHECK-NEXT: [[INC9]] = add nuw nsw i64 [[I_2]], 1 -; CHECK-NEXT: [[INC10]] = add nsw i64 [[J_2]], 1 +; CHECK-NEXT: [[INC10]] = add i64 [[J_2]], 1 ; CHECK-NEXT: [[T2:%.*]] = load i64, i64* @cond, align 8 ; CHECK-NEXT: [[TOBOOL12:%.*]] = icmp eq i64 [[T2]], 0 ; CHECK-NEXT: br i1 [[TOBOOL12]], label [[DO_BODY14_PREHEADER:%.*]], label [[DO_BODY8]] @@ -50,7 +50,7 @@ ; CHECK-NEXT: [[J_3:%.*]] = phi i64 [ [[INC16:%.*]], [[DO_BODY14]] ], [ [[INC10]], [[DO_BODY14_PREHEADER]] ] ; CHECK-NEXT: tail call void @goo(i64 [[I_3]], i64 [[J_3]]) ; CHECK-NEXT: [[INC15]] = add nuw nsw i64 [[I_3]], 1 -; CHECK-NEXT: [[INC16]] = add nsw i64 [[J_3]], 1 +; CHECK-NEXT: [[INC16]] = add i64 [[J_3]], 1 ; CHECK-NEXT: [[T3:%.*]] = load i64, i64* @cond, align 8 ; CHECK-NEXT: [[TOBOOL18:%.*]] = icmp eq i64 [[T3]], 0 ; CHECK-NEXT: br i1 [[TOBOOL18]], label [[DO_BODY20_PREHEADER:%.*]], label [[DO_BODY14]] @@ -61,7 +61,7 @@ ; CHECK-NEXT: [[J_4:%.*]] = phi i64 [ [[INC22:%.*]], [[DO_BODY20]] ], [ [[INC16]], [[DO_BODY20_PREHEADER]] ] ; CHECK-NEXT: tail call void @goo(i64 [[I_4]], i64 [[J_4]]) ; CHECK-NEXT: [[INC21]] = add nuw nsw i64 [[I_4]], 1 -; CHECK-NEXT: [[INC22]] = add nsw i64 [[J_4]], 1 +; CHECK-NEXT: [[INC22]] = add i64 [[J_4]], 1 ; CHECK-NEXT: [[T4:%.*]] = load i64, i64* @cond, align 8 ; CHECK-NEXT: [[TOBOOL24:%.*]] = icmp eq i64 [[T4]], 0 ; CHECK-NEXT: br i1 [[TOBOOL24]], label [[DO_BODY26_PREHEADER:%.*]], label [[DO_BODY20]]