Index: llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1015,9 +1015,11 @@ // Find a suitable position to insert the loop start instruction. It needs to // be able to safely define LR. auto FindStartInsertionPoint = [](MachineInstr *Start, + MachineInstr *Dec, MachineBasicBlock::iterator &InsertPt, MachineBasicBlock *&InsertBB, - ReachingDefAnalysis &RDA) { + ReachingDefAnalysis &RDA, + InstSet &ToRemove) { // We can define LR because LR already contains the same value. if (Start->getOperand(0).getReg() == ARM::LR) { InsertPt = MachineBasicBlock::iterator(Start); @@ -1033,23 +1035,29 @@ MI->getOperand(2).getImm() == ARMCC::AL; }; - MachineBasicBlock *MBB = Start->getParent(); - // Find an insertion point: // - Is there a (mov lr, Count) before Start? If so, and nothing else - // writes to Count before Start, we can insert at that mov. + // writes to Count before Start, we can insert at start. if (auto *LRDef = RDA.getUniqueReachingMIDef(Start, ARM::LR)) { if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) { - InsertPt = MachineBasicBlock::iterator(LRDef); - InsertBB = LRDef->getParent(); + SmallPtrSet Ignore = { Dec }; + if (!TryRemove(LRDef, RDA, ToRemove, Ignore)) + return false; + InsertPt = MachineBasicBlock::iterator(Start); + InsertBB = Start->getParent(); return true; } } // - Is there a (mov lr, Count) after Start? If so, and nothing else writes - // to Count after Start, we can insert at that mov. + // to Count after Start, we can insert at that mov (which will now be + // dead). + MachineBasicBlock *MBB = Start->getParent(); if (auto *LRDef = RDA.getLocalLiveOutMIDef(MBB, ARM::LR)) { if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) { + SmallPtrSet Ignore = { Start, Dec }; + if (!TryRemove(LRDef, RDA, ToRemove, Ignore)) + return false; InsertPt = MachineBasicBlock::iterator(LRDef); InsertBB = LRDef->getParent(); return true; @@ -1066,7 +1074,8 @@ return true; }; - if (!FindStartInsertionPoint(Start, StartInsertPt, StartInsertBB, RDA)) { + if (!FindStartInsertionPoint(Start, Dec, StartInsertPt, StartInsertBB, RDA, + ToRemove)) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); Revert = true; return; @@ -1411,9 +1420,6 @@ // Collect and remove the users of iteration count. SmallPtrSet Killed = { LoLoop.Start, LoLoop.Dec, LoLoop.End }; - if (LoLoop.StartInsertPt != LoLoop.StartInsertBB->end()) - Killed.insert(&*LoLoop.StartInsertPt); - if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed)) LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n"); } @@ -1439,9 +1445,6 @@ if (!IsDo) MIB.add(Start->getOperand(1)); - // If we're inserting at a mov lr, then remove it as it's redundant. - if (InsertPt != MBB->end()) - LoLoop.ToRemove.insert(&*InsertPt); LoLoop.ToRemove.insert(Start); LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB); return &*MIB; Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir @@ -78,12 +78,12 @@ ; CHECK: successors: %bb.5(0x80000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2DLS killed $r4 ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14 /* CC::al */, $noreg ; CHECK: renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14 /* CC::al */, $noreg, implicit killed $q0 ; CHECK: $s2 = VMOVSR $r1, 14 /* CC::al */, $noreg ; CHECK: renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg + ; CHECK: $lr = t2DLS killed $r4 ; CHECK: renamable $s4 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: bb.5: Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir @@ -273,7 +273,6 @@ ; CHECK: renamable $r5 = tLDRr renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep617) ; CHECK: renamable $r7, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r6 = tLDRr renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep418) - ; CHECK: dead $r12 = tMOVr $lr, 14 /* CC::al */, $noreg ; CHECK: renamable $r8 = nuw t2ADDri killed renamable $r8, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r5, dead $cpsr = tEOR killed renamable $r5, killed renamable $r6, 14 /* CC::al */, $noreg ; CHECK: renamable $r6 = tLDRr renamable $r0, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep219) Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir @@ -175,8 +175,8 @@ ; CHECK: successors: %bb.3(0x80000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: $s4 = VMOVSR $r1, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2DLS killed $r4 ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, killed renamable $s3, 14 /* CC::al */, $noreg, implicit killed $q0 + ; CHECK: $lr = t2DLS killed $r4 ; CHECK: renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -26,10 +26,10 @@ ; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit ; CHECK-NEXT: vmov s4, r1 -; CHECK-NEXT: dls lr, r4 ; CHECK-NEXT: vadd.f32 s0, s3, s3 ; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: vcvt.f32.u32 s4, s4 +; CHECK-NEXT: dls lr, r4 ; CHECK-NEXT: vdiv.f32 s0, s0, s4 ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: vmov.i32 q0, #0x0 Index: llvm/test/CodeGen/Thumb2/mve-float32regloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1435,8 +1435,8 @@ ; CHECK-NEXT: vdup.32 q1, r6 ; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: vmov.f32 s6, s12 -; CHECK-NEXT: dls lr, r3 ; CHECK-NEXT: vmov.f32 s10, s14 +; CHECK-NEXT: dls lr, r3 ; CHECK-NEXT: vmov.f32 s7, s12 ; CHECK-NEXT: vmov.f32 s11, s14 ; CHECK-NEXT: .LBB17_3: @ Parent Loop BB17_2 Depth=1