diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -796,6 +796,20 @@ ToRemove.insert(ElementChain.begin(), ElementChain.end()); } } + + // If we converted the LoopStart to a t2DoLoopStartTP, we can also remove any + // extra instructions in the preheader, which often includes a now unused MOV. + if (Start->getOpcode() == ARM::t2DoLoopStartTP && Preheader && + !Preheader->empty() && + !RDA.hasLocalDefBefore(VCTP, VCTP->getOperand(1).getReg())) { + if (auto *Def = RDA.getUniqueReachingMIDef( + &Preheader->back(), VCTP->getOperand(1).getReg().asMCReg())) { + SmallPtrSet Ignore; + Ignore.insert(VCTPs.begin(), VCTPs.end()); + TryRemove(Def, RDA, ToRemove, Ignore); + } + } + return true; } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll @@ -10,7 +10,6 @@ ; CHECK-NEXT: ldrd r12, r2, [r0] ; CHECK-NEXT: ldrd r4, r3, [r0, #8] ; CHECK-NEXT: rsb r12, r12, r2, lsl #1 -; CHECK-NEXT: mov r2, r12 ; CHECK-NEXT: dlstp.16 lr, r12 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -7,7 +7,6 @@ ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: dlstp.32 lr, r1 ; CHECK-NEXT: .LBB0_1: @ %do.body.i @@ -19,7 +18,6 @@ ; CHECK-NEXT: vmov s4, r1 ; CHECK-NEXT: vadd.f32 s0, s3, s3 ; CHECK-NEXT: vcvt.f32.u32 s4, s4 -; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: vdiv.f32 s0, s0, s4 ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: vmov.i32 q0, #0x0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll @@ -78,7 +78,6 @@ ; CHECK-NEXT: @ %bb.5: @ %do.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 ; CHECK-NEXT: bic r9, r7, #3 -; CHECK-NEXT: mov r7, r5 ; CHECK-NEXT: mov r4, r3 ; CHECK-NEXT: add.w r8, r0, r9, lsl #2 ; CHECK-NEXT: dlstp.32 lr, r5 diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll --- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll +++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll @@ -447,7 +447,6 @@ ; CHECK-NEXT: movw r3, :lower16:b ; CHECK-NEXT: movt r3, :upper16:b ; CHECK-NEXT: str r1, [r3] -; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: dlstp.32 lr, r6 ; CHECK-NEXT: .LBB1_10: @ %vector.body111 ; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1 @@ -462,7 +461,6 @@ ; CHECK-NEXT: .LBB1_11: @ %vector.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB1_8 Depth=2 ; CHECK-NEXT: vmov q1, q4 -; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: dlstp.32 lr, r6 ; CHECK-NEXT: .LBB1_12: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1 diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll --- a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll @@ -24,7 +24,6 @@ ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: mov r7, r3 -; CHECK-NEXT: mov r4, r5 ; CHECK-NEXT: dlstp.32 lr, r5 ; CHECK-NEXT: .LBB0_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1 @@ -127,7 +126,6 @@ ; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB1_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB1_2 Depth=1 @@ -272,7 +270,6 @@ ; CHECK-NEXT: mov r4, r10 ; CHECK-NEXT: vmov q2, q0 ; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: mov r8, r7 ; CHECK-NEXT: dlstp.32 lr, r7 ; CHECK-NEXT: .LBB2_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB2_2 Depth=1 @@ -448,7 +445,6 @@ ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmov q2, q0 ; CHECK-NEXT: vmov q3, q0 -; CHECK-NEXT: mov r10, r7 ; CHECK-NEXT: dlstp.32 lr, r7 ; CHECK-NEXT: .LBB3_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB3_2 Depth=1 @@ -645,7 +641,6 @@ ; CHECK-NEXT: vmov q3, q1 ; CHECK-NEXT: vmov q2, q1 ; CHECK-NEXT: vmov q4, q1 -; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: dlstp.32 lr, r7 ; CHECK-NEXT: .LBB4_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB4_2 Depth=1 @@ -864,7 +859,6 @@ ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vmov q2, q1 -; CHECK-NEXT: mov r9, r7 ; CHECK-NEXT: dlstp.32 lr, r7 ; CHECK-NEXT: .LBB5_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB5_2 Depth=1 diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -739,7 +739,6 @@ ; CHECK-NEXT: mla r3, r9, r2, r0 ; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: mov r11, r2 ; CHECK-NEXT: mov r8, r10 ; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB5_7: @ %for.body24 @@ -913,7 +912,6 @@ ; CHECK-NEXT: mla r3, r9, r2, r0 ; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: mov r11, r2 ; CHECK-NEXT: mov r8, r10 ; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB6_5: @ %for.body24 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll --- a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll @@ -14,7 +14,6 @@ ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: vmov.i32 q3, #0x4 -; CHECK-NEXT: mov r12, r1 ; CHECK-NEXT: dlstp.32 lr, r1 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1