Index: llvm/lib/CodeGen/MachineLICM.cpp =================================================================== --- llvm/lib/CodeGen/MachineLICM.cpp +++ llvm/lib/CodeGen/MachineLICM.cpp @@ -314,19 +314,6 @@ INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm", "Early Machine Loop Invariant Code Motion", false, false) -/// Test if the given loop is the outer-most loop that has a unique predecessor. -static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { - // Check whether this loop even has a unique predecessor. - if (!CurLoop->getLoopPredecessor()) - return false; - // Ok, now check to see if any of its outer loops do. - for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) - if (L->getLoopPredecessor()) - return false; - // None of them did, so this is the outermost with a unique predecessor. - return true; -} - bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -372,13 +359,6 @@ CurPreheader = nullptr; ExitBlocks.clear(); - // If this is done before regalloc, only visit outer-most preheader-sporting - // loops. - if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { - Worklist.append(CurLoop->begin(), CurLoop->end()); - continue; - } - CurLoop->getExitBlocks(ExitBlocks); if (!PreRegAlloc) @@ -781,20 +761,28 @@ if (!Hoist(&MI, Preheader)) { // We have failed to hoist MI to outmost loop's preheader. If MI is in // subloop, try to hoist it to subloop's preheader. - MachineLoop *InnerMostLoop = MLI->getLoopFor(MI.getParent()); - MachineBasicBlock *InnerMostLoopPreheader = - InnerMostLoop->getLoopPreheader(); - if (CurLoop != InnerMostLoop && InnerMostLoopPreheader) { - std::swap(CurLoop, InnerMostLoop); - std::swap(CurPreheader, InnerMostLoopPreheader); - Hoist(&MI, CurPreheader); - std::swap(CurLoop, InnerMostLoop); - std::swap(CurPreheader, InnerMostLoopPreheader); + SmallVector InnerLoopWorkList; + for (MachineLoop *L = MLI->getLoopFor(MI.getParent()); L != CurLoop; + L = L->getParentLoop()) + InnerLoopWorkList.push_back(L); + + MachineLoop *OutMostLoop = CurLoop; + MachineBasicBlock *OutMostLoopPreheader = CurPreheader; + while (!InnerLoopWorkList.empty()) { + CurLoop = InnerLoopWorkList.pop_back_val(); + CurPreheader = CurLoop->getLoopPreheader(); + if (CurPreheader) { + if (Hoist(&MI, CurPreheader)) + break; + } } - // When MI is hoisted to inner-most loop's preheader, we need to update - // reg pressure because we have already visited inner-most loop's - // preheader. + + // When MI is hoisted to inner loop's preheader, we need to update reg + // pressure because we have already visited inner loop's preheader. UpdateRegPressure(&MI); + + CurLoop = OutMostLoop; + CurPreheader = OutMostLoopPreheader; } // If we have hoisted an instruction that may store, it can only be a @@ -1325,9 +1313,15 @@ MachineInstr * MachineLICMBase::LookForDuplicate(const MachineInstr *MI, std::vector &PrevMIs) { - for (MachineInstr *PrevMI : PrevMIs) + for (MachineInstr *PrevMI : PrevMIs) { + // PrevMI could be in inner loop's preheader so let's check PrevMI's block + // dominates MI's block. + if (!DT->dominates(PrevMI->getParent(), MI->getParent())) + continue; + if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr))) return PrevMI; + } return nullptr; } Index: llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll +++ llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll @@ -871,7 +871,7 @@ ; CHECK-NEXT: vdup.32 q0, r2 ; CHECK-NEXT: vldrw.u32 q1, [r7] ; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: mov.w r9, #6 +; CHECK-NEXT: mov.w r10, #6 ; CHECK-NEXT: movs r6, #11 ; CHECK-NEXT: vshl.i32 q0, q0, #2 ; CHECK-NEXT: movs r5, #0 @@ -881,7 +881,7 @@ ; CHECK-NEXT: @ Child Loop BB11_3 Depth 3 ; CHECK-NEXT: @ Child Loop BB11_4 Depth 4 ; CHECK-NEXT: @ Child Loop BB11_5 Depth 5 -; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: mov.w r9, #0 ; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: .LBB11_2: @ %for.cond22.preheader.i ; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1 @@ -889,33 +889,33 @@ ; CHECK-NEXT: @ Child Loop BB11_3 Depth 3 ; CHECK-NEXT: @ Child Loop BB11_4 Depth 4 ; CHECK-NEXT: @ Child Loop BB11_5 Depth 5 -; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: vdup.32 q2, r9 +; CHECK-NEXT: vstrw.32 q2, [sp, #8] @ 16-byte Spill ; CHECK-NEXT: .LBB11_3: @ %for.body27.i ; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1 ; CHECK-NEXT: @ Parent Loop BB11_2 Depth=2 ; CHECK-NEXT: @ => This Loop Header: Depth=3 ; CHECK-NEXT: @ Child Loop BB11_4 Depth 4 ; CHECK-NEXT: @ Child Loop BB11_5 Depth 5 -; CHECK-NEXT: dls lr, r9 -; CHECK-NEXT: vdup.32 q2, r10 +; CHECK-NEXT: dls lr, r10 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: mov.w r11, #4 -; CHECK-NEXT: vdup.32 q3, r5 -; CHECK-NEXT: vstrw.32 q2, [sp, #8] @ 16-byte Spill +; CHECK-NEXT: vdup.32 q3, r7 ; CHECK-NEXT: .LBB11_4: @ %for.body78.us.i ; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1 ; CHECK-NEXT: @ Parent Loop BB11_2 Depth=2 ; CHECK-NEXT: @ Parent Loop BB11_3 Depth=3 ; CHECK-NEXT: @ => This Loop Header: Depth=4 ; CHECK-NEXT: @ Child Loop BB11_5 Depth 5 -; CHECK-NEXT: mul r7, r11, r6 +; CHECK-NEXT: mul r5, r11, r6 ; CHECK-NEXT: vmov q4, q3 -; CHECK-NEXT: vadd.i32 q5, q1, r7 +; CHECK-NEXT: vadd.i32 q5, q1, r5 ; CHECK-NEXT: vmla.i32 q4, q5, r2 ; CHECK-NEXT: vldrw.u32 q5, [sp, #8] @ 16-byte Reload -; CHECK-NEXT: adds r7, #113 -; CHECK-NEXT: vadd.i32 q6, q1, r7 -; CHECK-NEXT: mov r7, r8 +; CHECK-NEXT: adds r5, #113 +; CHECK-NEXT: vadd.i32 q6, q1, r5 +; CHECK-NEXT: mov r5, r8 ; CHECK-NEXT: vmla.i32 q5, q6, r2 ; CHECK-NEXT: .LBB11_5: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1 @@ -928,7 +928,7 @@ ; CHECK-NEXT: vldrb.s32 q5, [r1, q4] ; CHECK-NEXT: vadd.i32 q6, q4, q0 ; CHECK-NEXT: vadd.i32 q2, q2, r2 -; CHECK-NEXT: subs r7, #4 +; CHECK-NEXT: subs r5, #4 ; CHECK-NEXT: vmlava.u32 r12, q2, q5 ; CHECK-NEXT: vmov q5, q7 ; CHECK-NEXT: vmov q4, q6 @@ -939,14 +939,14 @@ ; CHECK-NEXT: le lr, .LBB11_4 ; CHECK-NEXT: @ %bb.7: @ %for.cond.cleanup77.i ; CHECK-NEXT: @ in Loop: Header=BB11_3 Depth=3 -; CHECK-NEXT: adds r5, #1 +; CHECK-NEXT: adds r7, #1 ; CHECK-NEXT: adds r4, #1 -; CHECK-NEXT: cmp r5, r2 +; CHECK-NEXT: cmp r7, r2 ; CHECK-NEXT: bne .LBB11_3 ; CHECK-NEXT: @ %bb.8: @ %for.cond.cleanup26.i ; CHECK-NEXT: @ in Loop: Header=BB11_2 Depth=2 -; CHECK-NEXT: add.w r10, r10, #1 -; CHECK-NEXT: cmp r10, r3 +; CHECK-NEXT: add.w r9, r9, #1 +; CHECK-NEXT: cmp r9, r3 ; CHECK-NEXT: bne .LBB11_2 ; CHECK-NEXT: @ %bb.9: @ %for.cond.cleanup20.i ; CHECK-NEXT: @ in Loop: Header=BB11_1 Depth=1