Index: llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -789,6 +789,21 @@ ToRemove.insert(ElementChain.begin(), ElementChain.end()); } } + // If we converted the LoopStart to a t2DoLoopStartTP, we can also remove any + // extra instructions in the preheader, which often includes a now unused MOV. + if (Start->getOpcode() == ARM::t2DoLoopStartTP && Preheader && + !Preheader->empty() && + !RDA.hasLocalDefBefore(VCTP, VCTP->getOperand(1).getReg())) { + if (auto *Def = RDA.getUniqueReachingMIDef( + &Preheader->back(), VCTP->getOperand(1).getReg().asMCReg())) { + SmallPtrSet ElementChain; + SmallPtrSet Ignore; + Ignore.insert(VCTPs.begin(), VCTPs.end()); + if (TryRemove(Def, RDA, ElementChain, Ignore)) { + ToRemove.insert(ElementChain.begin(), ElementChain.end()); + } + } + } return true; } Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll @@ -10,7 +10,6 @@ ; CHECK-NEXT: ldrd r12, r2, [r0] ; CHECK-NEXT: ldrd r4, r3, [r0, #8] ; CHECK-NEXT: rsb r12, r12, r2, lsl #1 -; CHECK-NEXT: mov r2, r12 ; CHECK-NEXT: dlstp.16 lr, r12 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -7,7 +7,6 @@ ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: dlstp.32 lr, r1 ; CHECK-NEXT: .LBB0_1: @ %do.body.i @@ -19,7 +18,6 @@ ; CHECK-NEXT: vmov s4, r1 ; CHECK-NEXT: vadd.f32 s0, s3, s3 ; CHECK-NEXT: vcvt.f32.u32 s4, s4 -; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: vdiv.f32 s0, s0, s4 ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: vmov.i32 q0, #0x0 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll @@ -78,7 +78,6 @@ ; CHECK-NEXT: @ %bb.5: @ %do.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 ; CHECK-NEXT: bic r9, r7, #3 -; CHECK-NEXT: mov r7, r5 ; CHECK-NEXT: mov r4, r3 ; CHECK-NEXT: add.w r8, r0, r9, lsl #2 ; CHECK-NEXT: dlstp.32 lr, r5 Index: llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll +++ llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll @@ -24,7 +24,6 @@ ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: mov r7, r3 -; CHECK-NEXT: mov r4, r5 ; CHECK-NEXT: dlstp.32 lr, r5 ; CHECK-NEXT: .LBB0_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1 @@ -127,7 +126,6 @@ ; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB1_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB1_2 Depth=1 @@ -272,7 +270,6 @@ ; CHECK-NEXT: mov r4, r10 ; CHECK-NEXT: vmov q2, q0 ; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: mov r8, r7 ; CHECK-NEXT: dlstp.32 lr, r7 ; CHECK-NEXT: .LBB2_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB2_2 Depth=1 @@ -448,7 +445,6 @@ ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmov q2, q0 ; CHECK-NEXT: vmov q3, q0 -; CHECK-NEXT: mov r10, r7 ; CHECK-NEXT: dlstp.32 lr, r7 ; CHECK-NEXT: .LBB3_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB3_2 Depth=1 @@ -645,7 +641,6 @@ ; CHECK-NEXT: vmov q3, q1 ; CHECK-NEXT: vmov q2, q1 ; CHECK-NEXT: vmov q4, q1 -; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: dlstp.32 lr, r7 ; CHECK-NEXT: .LBB4_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB4_2 Depth=1 @@ -864,7 +859,6 @@ ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vmov q2, q1 -; CHECK-NEXT: mov r9, r7 ; CHECK-NEXT: dlstp.32 lr, r7 ; CHECK-NEXT: .LBB5_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB5_2 Depth=1 Index: llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -740,7 +740,6 @@ ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov r8, r12 ; CHECK-NEXT: mla r3, r11, r2, r0 -; CHECK-NEXT: mov r9, r2 ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: ldm.w sp, {r0, r5, r7} @ 12-byte Folded Reload ; CHECK-NEXT: dlstp.16 lr, r2 @@ -913,7 +912,6 @@ ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov r8, r12 ; CHECK-NEXT: mla r3, r11, r2, r0 -; CHECK-NEXT: mov r9, r2 ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: ldm.w sp, {r0, r5, r7} @ 12-byte Folded Reload ; CHECK-NEXT: dlstp.16 lr, r2 Index: llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll +++ llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll @@ -14,7 +14,6 @@ ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: vmov.i32 q3, #0x4 -; CHECK-NEXT: mov r12, r1 ; CHECK-NEXT: dlstp.32 lr, r1 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1