Index: llvm/lib/CodeGen/MachineLICM.cpp =================================================================== --- llvm/lib/CodeGen/MachineLICM.cpp +++ llvm/lib/CodeGen/MachineLICM.cpp @@ -327,6 +327,21 @@ return true; } +static void addSubLoopsToWorkList(MachineLoop *Loop, + SmallVectorImpl &Worklist, + bool PreRA) { + // Add loop to worklist + Worklist.push_back(Loop); + + // If it is pre-ra LICM, add sub loops to worklist. + if (PreRA && !Loop->isInnermost()) { + MachineLoop::iterator MLI = Loop->begin(); + MachineLoop::iterator MLE = Loop->end(); + for (; MLI != MLE; ++MLI) + addSubLoopsToWorkList(*MLI, Worklist, PreRA); + } +} + bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -366,19 +381,18 @@ DT = &getAnalysis(); AA = &getAnalysis().getAAResults(); - SmallVector Worklist(MLI->begin(), MLI->end()); + SmallVector Worklist; + + MachineLoopInfo::iterator MLII = MLI->begin(); + MachineLoopInfo::iterator MLIE = MLI->end(); + for (; MLII != MLIE; ++MLII) + addSubLoopsToWorkList(*MLII, Worklist, PreRegAlloc); + while (!Worklist.empty()) { CurLoop = Worklist.pop_back_val(); CurPreheader = nullptr; ExitBlocks.clear(); - // If this is done before regalloc, only visit outer-most preheader-sporting - // loops. - if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { - Worklist.append(CurLoop->begin(), CurLoop->end()); - continue; - } - CurLoop->getExitBlocks(ExitBlocks); if (!PreRegAlloc) Index: llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll +++ llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll @@ -38,6 +38,7 @@ ; CHECK-NEXT: .p2align 5, , 16 ; CHECK-NEXT: .LBB0_5: // %vector.ph ; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: dup v0.8h, w15 ; CHECK-NEXT: mov x16, x14 ; CHECK-NEXT: mov x17, x12 ; CHECK-NEXT: mov x18, x11 @@ -45,9 +46,8 @@ ; CHECK-NEXT: .LBB0_6: // %vector.body ; CHECK-NEXT: // Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: // => This Inner Loop Header: Depth=2 -; CHECK-NEXT: dup v0.8h, w15 -; CHECK-NEXT: subs x18, x18, #16 ; CHECK-NEXT: ldp q1, q2, [x16, #-16] +; CHECK-NEXT: subs x18, x18, #16 ; CHECK-NEXT: add x16, x16, #32 ; CHECK-NEXT: ldp q4, q3, [x17, #-32] ; CHECK-NEXT: smlal v4.4s, v0.4h, v1.4h