diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -630,25 +630,6 @@ return false; } - // Check that creating a [W|D]LSTP, which will define LR with an element - // count instead of iteration count, won't affect any other instructions - // than the LoopStart and LoopDec. - // TODO: We should try to insert the [W|D]LSTP after any of the other uses. - Register StartReg = isDo(Start) ? Start->getOperand(1).getReg() - : Start->getOperand(0).getReg(); - if (StartInsertPt == Start && StartReg == ARM::LR) { - if (auto *IterCount = RDA.getMIOperand(Start, isDo(Start) ? 1 : 0)) { - SmallPtrSet Uses; - RDA.getGlobalUses(IterCount, MCRegister::from(ARM::LR), Uses); - for (auto *Use : Uses) { - if (Use != Start && Use != Dec) { - LLVM_DEBUG(dbgs() << " ARM Loops: Found LR use: " << *Use); - return false; - } - } - } - } - // For tail predication, we need to provide the number of elements, instead // of the iteration count, to the loop start instruction. The number of // elements is provided to the vctp instruction, so we need to check that @@ -1410,8 +1391,7 @@ LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n"); - MachineInstr *Def = - RDA->getMIOperand(LoLoop.Start, isDo(LoLoop.Start) ? 1 : 0); + MachineInstr *Def = RDA->getMIOperand(LoLoop.Start, 1); if (!Def) { LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n"); return; diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir @@ -142,21 +142,18 @@ ; CHECK: renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) + ; CHECK: t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 ; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg ; CHECK: bb.1.do.body: ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK: liveins: $r0, $r1, $r2, $r12 + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: $lr = tMOVr $r2, 14 /* CC::al */, $noreg - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg ; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4) ; CHECK: renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4) - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 + ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4) + ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1 ; CHECK: bb.2.do.end: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: @@ -246,21 +243,18 @@ ; CHECK: renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) + ; CHECK: t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 ; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg ; CHECK: bb.1.do.body: ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK: liveins: $r0, $r1, $r2, $r12 + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: $lr = tMOVr $r2, 14 /* CC::al */, $noreg - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg ; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4) ; CHECK: renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4) - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 + ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4) + ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1 ; CHECK: bb.2.do.end: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir @@ -146,8 +146,6 @@ ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -16 ; CHECK: renamable $r4 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.5) ; CHECK: renamable $r12 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r5, dead $cpsr = tADDi3 renamable $r4, 3, 14 /* CC::al */, $noreg - ; CHECK: dead renamable $r5, dead $cpsr = tLSRri killed renamable $r5, 2, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_WLSTP_32 killed renamable $r4, %bb.3 ; CHECK: bb.1.for.body.lr.ph: ; CHECK: successors: %bb.2(0x80000000) diff --git a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll --- a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll @@ -325,20 +325,13 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: add.w r1, r2, #15 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: bic r1, r1, #16 ; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: lsr.w lr, r1, #4 ; CHECK-NEXT: mov r1, r0 -; CHECK-NEXT: mov r12, lr -; CHECK-NEXT: wls lr, lr, .LBB13_2 +; CHECK-NEXT: wlstp.8 lr, r3, .LBB13_2 ; CHECK-NEXT: .LBB13_1: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.8 r3 -; CHECK-NEXT: subs r3, #16 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrbt.8 q0, [r1], #16 -; CHECK-NEXT: le lr, .LBB13_1 +; CHECK-NEXT: vstrb.8 q0, [r1], #16 +; CHECK-NEXT: letp lr, .LBB13_1 ; CHECK-NEXT: .LBB13_2: @ %entry ; CHECK-NEXT: wlstp.8 lr, r2, .LBB13_4 ; CHECK-NEXT: .LBB13_3: @ =>This Inner Loop Header: Depth=1