diff --git a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp --- a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp +++ b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp @@ -36,7 +36,7 @@ ARMBlockPlacement() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; - void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *After); + void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *Before); bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other); bool fixBackwardsWLS(MachineLoop *ML); bool processPostOrderLoops(MachineLoop *ML); @@ -82,11 +82,11 @@ } /// Checks if loop has a backwards branching WLS, and if possible, fixes it. -/// This requires checking the preheader (or it's predecessor) for a WLS and if -/// its target is before it. -/// If moving the target block wouldn't produce another backwards WLS or a new -/// forwards LE branch, then move the target block after the preheader (or it's -/// predecessor). +/// This requires checking the predecessor (ie. preheader or it's predecessor) +/// for a WLS and if its loopExit/target is before it. +/// If moving the predecessor won't convert a WLS (to the predecessor) from +/// a forward to a backward branching WLS, then move the predecessor block +/// to before the loopExit/target. bool ARMBlockPlacement::fixBackwardsWLS(MachineLoop *ML) { MachineInstr *WlsInstr = findWLS(ML); if (!WlsInstr) @@ -95,6 +95,9 @@ MachineBasicBlock *Predecessor = WlsInstr->getParent(); MachineBasicBlock *LoopExit = WlsInstr->getOperand(2).getMBB(); // We don't want to move the function's entry block. + if (!Predecessor->getPrevNode()) + return false; + // We don't want to move to before the function's entry block. if (!LoopExit->getPrevNode()) return false; if (blockIsBefore(Predecessor, LoopExit)) @@ -103,77 +106,37 @@ << Predecessor->getFullName() << " to " << LoopExit->getFullName() << "\n"); - // Make sure that moving the target block doesn't cause any of its WLSs - // that were previously not backwards to become backwards - bool CanMove = true; - MachineInstr *WlsInLoopExit = findWLSInBlock(LoopExit); - if (WlsInLoopExit) { - // An example loop structure where the LoopExit can't be moved, since - // bb1's WLS will become backwards once it's moved after bb3 - // bb1: - LoopExit - // WLS bb2 - // bb2: - LoopExit2 - // ... - // bb3: - Predecessor - // WLS bb1 - // bb4: - Header - MachineBasicBlock *LoopExit2 = WlsInLoopExit->getOperand(2).getMBB(); - // If the WLS from LoopExit to LoopExit2 is already backwards then - // moving LoopExit won't affect it, so it can be moved. If LoopExit2 is - // after the Predecessor then moving will keep it as a forward branch, so it - // can be moved. If LoopExit2 is between the Predecessor and LoopExit then - // moving LoopExit will make it a backwards branch, so it can't be moved - // since we'd fix one and introduce one backwards branch. - // TODO: Analyse the blocks to make a decision if it would be worth - // moving LoopExit even if LoopExit2 is between the Predecessor and - // LoopExit. - if (!blockIsBefore(LoopExit2, LoopExit) && - (LoopExit2 == Predecessor || blockIsBefore(LoopExit2, Predecessor))) { - LLVM_DEBUG(dbgs() << DEBUG_PREFIX - << "Can't move the target block as it would " - "introduce a new backwards WLS branch\n"); - CanMove = false; - } - } - - if (CanMove) { - // Make sure no LEs become forwards. - // An example loop structure where the LoopExit can't be moved, since - // bb2's LE will become forwards once bb1 is moved after bb3. - // bb1: - LoopExit - // bb2: - // LE bb1 - Terminator - // bb3: - Predecessor - // WLS bb1 - // bb4: - Header - for (auto It = LoopExit->getIterator(); It != Predecessor->getIterator(); - It++) { - MachineBasicBlock *MBB = &*It; - for (auto &Terminator : MBB->terminators()) { - if (Terminator.getOpcode() != ARM::t2LoopEnd && - Terminator.getOpcode() != ARM::t2LoopEndDec) - continue; - MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB(); - // The LE will become forwards branching if it branches to LoopExit - // which isn't allowed by the architecture, so we should avoid - // introducing these. - // TODO: Analyse the blocks to make a decision if it would be worth - // moving LoopExit even if we'd introduce a forwards LE - if (LETarget == LoopExit) { - LLVM_DEBUG(dbgs() << DEBUG_PREFIX - << "Can't move the target block as it would " - "introduce a new forwards LE branch\n"); - CanMove = false; - break; - } + // Make sure no forward branching WLSs to the Predecessor become backwards branching. + // An example loop structure where the Predecessor can't be moved, since + // bb2's WLS will become forwards once bb3 is moved before/above bb1. + // + // bb1: - LoopExit + // bb2: + // WLS bb3 + // bb3: - Predecessor + // WLS bb1 + // bb4: - Header + for (auto It = ++LoopExit->getIterator(); It != Predecessor->getIterator(); + ++It) { + MachineBasicBlock *MBB = &*It; + for (auto &Terminator : MBB->terminators()) { + if (Terminator.getOpcode() != ARM::t2WhileLoopStartLR) + continue; + MachineBasicBlock *WLSTarget = Terminator.getOperand(2).getMBB(); + // TODO: Analyse the blocks to make a decision if it would be worth + // moving Preheader even if we'd introduce a backwards WLS + if (WLSTarget == Predecessor) { + LLVM_DEBUG( + dbgs() << DEBUG_PREFIX + << "Can't move the Predecessor block as it would convert a WLS" + "from forward branching to a backwards branching WLS\n"); + return false; } } } - if (CanMove) - moveBasicBlock(LoopExit, Predecessor); - - return CanMove; + moveBasicBlock(Predecessor, LoopExit); + return true; } /// Updates ordering (of WLS BB and their loopExits) in inner loops first @@ -212,18 +175,20 @@ return BBUtils->getOffsetOf(Other) > BBUtils->getOffsetOf(BB); } -/// Moves a given MBB to be positioned after another MBB while maintaining -/// existing control flow +// Moves a BasicBlock before another, without changing the control flow void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB, - MachineBasicBlock *After) { - LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " after " - << After->getName() << "\n"); + MachineBasicBlock *Before) { + LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " before " + << Before->getName() << "\n"); MachineBasicBlock *BBPrevious = BB->getPrevNode(); assert(BBPrevious && "Cannot move the function entry basic block"); - MachineBasicBlock *AfterNext = After->getNextNode(); MachineBasicBlock *BBNext = BB->getNextNode(); - BB->moveAfter(After); + MachineBasicBlock *BeforePrev = Before->getPrevNode(); + assert(BeforePrev && + "Cannot move the given block to before the function entry block"); + + BB->moveBefore(Before); // Since only the blocks are to be moved around (but the control flow must // not change), if there were any fall-throughs (to/from adjacent blocks), @@ -253,12 +218,12 @@ // Fix fall-through to the moved BB from the one that used to be before it. if (BBPrevious->isSuccessor(BB)) FixFallthrough(BBPrevious, BB); - // Fix fall through from the destination BB to the one that used to follow. - if (AfterNext && After->isSuccessor(AfterNext)) - FixFallthrough(After, AfterNext); + // Fix fall through from the destination BB to the one that used to before it. + if (BeforePrev->isSuccessor(Before)) + FixFallthrough(BeforePrev, Before); // Fix fall through from the moved BB to the one that used to follow. if (BBNext && BB->isSuccessor(BBNext)) FixFallthrough(BB, BBNext); - BBUtils->adjustBBOffsetsAfter(After); + BBUtils->adjustBBOffsetsAfter(BeforePrev); } diff --git a/llvm/test/CodeGen/Thumb2/block-placement.mir b/llvm/test/CodeGen/Thumb2/block-placement.mir --- a/llvm/test/CodeGen/Thumb2/block-placement.mir +++ b/llvm/test/CodeGen/Thumb2/block-placement.mir @@ -1,31 +1,20 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -run-pass=arm-block-placement %s -o - | FileCheck %s --- | - ; Checks that loopExitBlock gets moved (in forward direction) if there is a backwards WLS to it. + ; Checks that PreHeader gets moved (in backward direction) if it contains a backward WLS. define void @backwards_branch(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 { entry: unreachable } - ; Checks that loopExitBlock does not get reordered (since it is entry block) even if there is a backwards WLS to it. + ; Checks that Preheader does not get moved to before the loopExit (since it is entry block), even if there's a backwards WLS to it. define void @backwards_branch_entry_block(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 { entry: unreachable } - ; Checks that loopExitBlock (containing a backwards WLS) is moved (in forward direction) if there is a backwards WLS to it. - define void @backwards_branch_target_already_backwards(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 { - entry: - unreachable - } - - define void @backwards_branch_sibling(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) local_unnamed_addr #0 { - entry: - unreachable - } - - ; Checks that loopExitBlock (to which a backwards LE exists) is not moved if moving it would cause the LE to become forwards branching. - define void @backwards_branch_forwards_le(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) local_unnamed_addr #0 { + ; Checks that Preheader (to which a forward WLS exists) is not moved if moving it would cause the WLS to become backwards branching. + define void @backwards_branch_backwards_wls(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) local_unnamed_addr #0 { entry: unreachable } @@ -144,198 +133,22 @@ ... --- -name: backwards_branch_target_already_backwards +name: backwards_branch_backwards_wls body: | - ; CHECK-LABEL: name: backwards_branch_target_already_backwards - ; CHECK: bb.0: - ; CHECK: successors: %bb.2(0x50000000), %bb.1(0x30000000) - ; CHECK: tCMPi8 $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: t2Bcc %bb.1, 11 /* CC::lt */, killed $cpsr - ; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg - ; CHECK: bb.2: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: $lr = tMOVr $r0, 14 /* CC::al */, $noreg - ; CHECK: renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $lr, %bb.1, implicit-def dead $cpsr - ; CHECK: tB %bb.3, 14 /* CC::al */, $noreg - ; CHECK: bb.1: - ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: t2IT 11, 8, implicit-def $itstate - ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r1, %bb.0, implicit-def dead $cpsr - ; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg - ; CHECK: bb.3: - ; CHECK: successors: %bb.3(0x7c000000), %bb.1(0x04000000) - ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr - ; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg - ; CHECK: bb.4: - ; CHECK: successors: %bb.5(0x80000000) - ; CHECK: renamable $r0 = t2ADDrs killed renamable $r3, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r1, %bb.6, implicit-def dead $cpsr - ; CHECK: bb.5: - ; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000) - ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr - ; CHECK: t2B %bb.6, 14 /* CC::al */, $noreg - ; CHECK: bb.6: - ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc - bb.0: - successors: %bb.1(0x50000000), %bb.3(0x30000000) - liveins: $r0, $r1, $r2, $r3, $lr - - tCMPi8 $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - t2Bcc %bb.3, 11 /* CC::lt */, killed $cpsr - t2B %bb.1, 14 /* CC::al */, $noreg - - bb.3: - successors: %bb.4(0x80000000) - liveins: $r1, $r3 - - tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - t2IT 11, 8, implicit-def $itstate - frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate - $lr = t2WhileLoopStartLR killed renamable $r1, %bb.0, implicit-def dead $cpsr - t2B %bb.4, 14 /* CC::al */, $noreg - - bb.1: - successors: %bb.2(0x80000000) - liveins: $r0, $r1, $r2, $r3 - - $lr = tMOVr $r0, 14 /* CC::al */, $noreg - renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg - $lr = t2WhileLoopStartLR killed renamable $lr, %bb.3, implicit-def dead $cpsr - - bb.2: - successors: %bb.2(0x7c000000), %bb.3(0x04000000) - liveins: $lr, $r0, $r1, $r3 - - renamable $lr = t2LoopEndDec killed renamable $lr, %bb.2, implicit-def dead $cpsr - t2B %bb.3, 14 /* CC::al */, $noreg - - bb.4: - successors: %bb.5(0x80000000) - liveins: $r1, $r3 - - renamable $r0 = t2ADDrs killed renamable $r3, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg - $lr = t2WhileLoopStartLR killed renamable $r1, %bb.6, implicit-def dead $cpsr - - bb.5: - successors: %bb.5(0x7c000000), %bb.6(0x04000000) - liveins: $lr, $r0 - - renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr - t2B %bb.6, 14 /* CC::al */, $noreg - - bb.6: - frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc - -... ---- -name: backwards_branch_sibling -body: | - ; CHECK-LABEL: name: backwards_branch_sibling - ; CHECK: bb.0: - ; CHECK: successors: %bb.2(0x50000000), %bb.1(0x30000000) - ; CHECK: tCMPi8 $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: t2Bcc %bb.1, 11 /* CC::lt */, killed $cpsr - ; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg - ; CHECK: bb.1: - ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: t2IT 11, 8, implicit-def $itstate - ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r1, %bb.2, implicit-def dead $cpsr - ; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg - ; CHECK: bb.2: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: $lr = tMOVr $r0, 14 /* CC::al */, $noreg - ; CHECK: renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $lr, %bb.1, implicit-def dead $cpsr - ; CHECK: bb.3: - ; CHECK: successors: %bb.3(0x7c000000), %bb.1(0x04000000) - ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr - ; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg - ; CHECK: bb.4: - ; CHECK: successors: %bb.5(0x80000000) - ; CHECK: renamable $r0 = t2ADDrs killed renamable $r3, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r1, %bb.6, implicit-def dead $cpsr - ; CHECK: bb.5: - ; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000) - ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr - ; CHECK: t2B %bb.6, 14 /* CC::al */, $noreg - ; CHECK: bb.6: - ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc - bb.0: - successors: %bb.1(0x50000000), %bb.3(0x30000000) - liveins: $r0, $r1, $r2, $r3, $lr - - tCMPi8 $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - t2Bcc %bb.3, 11 /* CC::lt */, killed $cpsr - t2B %bb.1, 14 /* CC::al */, $noreg - - bb.3: - successors: %bb.4(0x80000000) - liveins: $r1, $r3 - - tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - t2IT 11, 8, implicit-def $itstate - frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate - $lr = t2WhileLoopStartLR killed renamable $r1, %bb.1, implicit-def dead $cpsr - t2B %bb.4, 14 /* CC::al */, $noreg - - bb.1: - successors: %bb.2(0x80000000) - liveins: $r0, $r1, $r2, $r3 - - $lr = tMOVr $r0, 14 /* CC::al */, $noreg - renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg - $lr = t2WhileLoopStartLR killed renamable $lr, %bb.3, implicit-def dead $cpsr - - bb.2: - successors: %bb.2(0x7c000000), %bb.3(0x04000000) - liveins: $lr, $r0, $r1, $r3 - - renamable $lr = t2LoopEndDec killed renamable $lr, %bb.2, implicit-def dead $cpsr - t2B %bb.3, 14 /* CC::al */, $noreg - - bb.4: - successors: %bb.5(0x80000000) - liveins: $r1, $r3 - - renamable $r0 = t2ADDrs killed renamable $r3, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg - $lr = t2WhileLoopStartLR killed renamable $r1, %bb.6, implicit-def dead $cpsr - - bb.5: - successors: %bb.5(0x7c000000), %bb.6(0x04000000) - liveins: $lr, $r0 - - renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr - t2B %bb.6, 14 /* CC::al */, $noreg - - bb.6: - frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc -... ---- -name: backwards_branch_forwards_le -body: | - ; CHECK-LABEL: name: backwards_branch_forwards_le + ; CHECK-LABEL: name: backwards_branch_backwards_wls ; CHECK: bb.0: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: tCMPi8 renamable $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate ; CHECK: bb.1: - ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.1, implicit-def dead $cpsr ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr + ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r0, %bb.3, implicit-def dead $cpsr ; CHECK: bb.3: ; CHECK: successors: %bb.3(0x7c000000), %bb.1(0x04000000) - ; CHECK: renamable $r0 = tLDRi renamable $r2, 0, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r0, renamable $r1, 0, 14 /* CC::al */, $noreg - ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr + ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr ; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg bb.0: successors: %bb.2(0x80000000) @@ -346,22 +159,19 @@ frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate bb.1: - renamable $lr = t2LoopEndDec killed renamable $lr, %bb.1, implicit-def dead $cpsr frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.2: successors: %bb.3(0x80000000) liveins: $r0, $r1, $r2 - $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr + $lr = t2WhileLoopStartLR killed renamable $r0, %bb.3, implicit-def dead $cpsr bb.3: successors: %bb.3(0x7c000000), %bb.1(0x04000000) liveins: $lr, $r1, $r2 - renamable $r0 = tLDRi renamable $r2, 0, 14 /* CC::al */, $noreg - tSTRi killed renamable $r0, renamable $r1, 0, 14 /* CC::al */, $noreg - renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr + $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr t2B %bb.1, 14 /* CC::al */, $noreg ... diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -1077,10 +1077,23 @@ ; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: b .LBB16_3 -; CHECK-NEXT: .LBB16_3: @ %while.body +; CHECK-NEXT: b .LBB16_5 +; CHECK-NEXT: .LBB16_3: @ %for.end +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: wls lr, r0, .LBB16_4 +; CHECK-NEXT: b .LBB16_9 +; CHECK-NEXT: .LBB16_4: @ %while.end +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: subs.w r12, r12, #1 +; CHECK-NEXT: vstrb.8 q0, [r2], #8 +; CHECK-NEXT: add.w r0, r5, r0, lsl #1 +; CHECK-NEXT: add.w r5, r0, #8 +; CHECK-NEXT: beq.w .LBB16_12 +; CHECK-NEXT: .LBB16_5: @ %while.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB16_5 Depth 2 +; CHECK-NEXT: @ Child Loop BB16_7 Depth 2 ; CHECK-NEXT: @ Child Loop BB16_10 Depth 2 ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: ldrh.w lr, [r3, #14] @@ -1117,14 +1130,14 @@ ; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vfma.f16 q0, q1, lr ; CHECK-NEXT: cmp r0, #16 -; CHECK-NEXT: blo .LBB16_6 -; CHECK-NEXT: @ %bb.4: @ %for.body.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: blo .LBB16_8 +; CHECK-NEXT: @ %bb.6: @ %for.body.preheader +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-NEXT: dls lr, r0 ; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: .LBB16_5: @ %for.body -; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1 +; CHECK-NEXT: .LBB16_7: @ %for.body +; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldrh r0, [r6], #16 ; CHECK-NEXT: vldrw.u32 q1, [r5] @@ -1155,39 +1168,26 @@ ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: adds r5, #16 ; CHECK-NEXT: vfma.f16 q0, q1, r4 -; CHECK-NEXT: le lr, .LBB16_5 -; CHECK-NEXT: b .LBB16_7 -; CHECK-NEXT: .LBB16_6: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: le lr, .LBB16_7 +; CHECK-NEXT: b .LBB16_3 +; CHECK-NEXT: .LBB16_8: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: .LBB16_7: @ %for.end -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: wls lr, r0, .LBB16_8 -; CHECK-NEXT: b .LBB16_9 -; CHECK-NEXT: .LBB16_8: @ %while.end -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: subs.w r12, r12, #1 -; CHECK-NEXT: vstrb.8 q0, [r2], #8 -; CHECK-NEXT: add.w r0, r5, r0, lsl #1 -; CHECK-NEXT: add.w r5, r0, #8 -; CHECK-NEXT: beq .LBB16_12 ; CHECK-NEXT: b .LBB16_3 ; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: .LBB16_10: @ %while.body76 -; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1 +; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldrh r4, [r6], #2 ; CHECK-NEXT: vldrh.u16 q1, [r0], #2 ; CHECK-NEXT: vfma.f16 q0, q1, r4 ; CHECK-NEXT: le lr, .LBB16_10 ; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: add.w r5, r5, r0, lsl #1 -; CHECK-NEXT: b .LBB16_8 +; CHECK-NEXT: b .LBB16_4 ; CHECK-NEXT: .LBB16_12: @ %if.end ; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1071,10 +1071,24 @@ ; CHECK-NEXT: str r6, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: b .LBB16_3 -; CHECK-NEXT: .LBB16_3: @ %while.body +; CHECK-NEXT: b .LBB16_5 +; CHECK-NEXT: .LBB16_3: @ %for.end +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldrd r0, r9, [sp, #20] @ 8-byte Folded Reload +; CHECK-NEXT: wls lr, r0, .LBB16_4 +; CHECK-NEXT: b .LBB16_9 +; CHECK-NEXT: .LBB16_4: @ %while.end +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: subs.w r12, r12, #1 +; CHECK-NEXT: vstrb.8 q0, [r2], #16 +; CHECK-NEXT: add.w r0, r4, r0, lsl #2 +; CHECK-NEXT: add.w r4, r0, #16 +; CHECK-NEXT: beq .LBB16_12 +; CHECK-NEXT: .LBB16_5: @ %while.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB16_5 Depth 2 +; CHECK-NEXT: @ Child Loop BB16_7 Depth 2 ; CHECK-NEXT: @ Child Loop BB16_10 Depth 2 ; CHECK-NEXT: add.w lr, r10, #8 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 @@ -1101,14 +1115,14 @@ ; CHECK-NEXT: vfma.f32 q0, q3, r11 ; CHECK-NEXT: cmp r0, #16 ; CHECK-NEXT: vfma.f32 q0, q1, r8 -; CHECK-NEXT: blo .LBB16_6 -; CHECK-NEXT: @ %bb.4: @ %for.body.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: blo .LBB16_8 +; CHECK-NEXT: @ %bb.6: @ %for.body.preheader +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: dls lr, r0 ; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: .LBB16_5: @ %for.body -; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1 +; CHECK-NEXT: .LBB16_7: @ %for.body +; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldm.w r7, {r0, r3, r5, r6, r8, r11} ; CHECK-NEXT: vldrw.u32 q1, [r4], #32 @@ -1129,40 +1143,26 @@ ; CHECK-NEXT: vfma.f32 q0, q2, r11 ; CHECK-NEXT: vfma.f32 q0, q3, r9 ; CHECK-NEXT: vfma.f32 q0, q1, r1 -; CHECK-NEXT: le lr, .LBB16_5 -; CHECK-NEXT: b .LBB16_7 -; CHECK-NEXT: .LBB16_6: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: le lr, .LBB16_7 +; CHECK-NEXT: b .LBB16_3 +; CHECK-NEXT: .LBB16_8: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: .LBB16_7: @ %for.end -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 -; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: ldrd r0, r9, [sp, #20] @ 8-byte Folded Reload -; CHECK-NEXT: wls lr, r0, .LBB16_8 -; CHECK-NEXT: b .LBB16_9 -; CHECK-NEXT: .LBB16_8: @ %while.end -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: subs.w r12, r12, #1 -; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: add.w r0, r4, r0, lsl #2 -; CHECK-NEXT: add.w r4, r0, #16 -; CHECK-NEXT: beq .LBB16_12 ; CHECK-NEXT: b .LBB16_3 ; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: mov r3, r4 ; CHECK-NEXT: .LBB16_10: @ %while.body76 -; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1 +; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldr r0, [r7], #4 ; CHECK-NEXT: vldrw.u32 q1, [r3], #4 ; CHECK-NEXT: vfma.f32 q0, q1, r0 ; CHECK-NEXT: le lr, .LBB16_10 ; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: add.w r4, r4, r0, lsl #2 -; CHECK-NEXT: b .LBB16_8 +; CHECK-NEXT: b .LBB16_4 ; CHECK-NEXT: .LBB16_12: @ %if.end ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}