diff --git a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp --- a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp +++ b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp @@ -36,7 +36,7 @@ ARMBlockPlacement() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; - void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *After); + void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *Before); bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other); bool fixBackwardsWLS(MachineLoop *ML); bool processPostOrderLoops(MachineLoop *ML); @@ -82,11 +82,11 @@ } /// Checks if loop has a backwards branching WLS, and if possible, fixes it. -/// This requires checking the preheader (or it's predecessor) for a WLS and if -/// its target is before it. -/// If moving the target block wouldn't produce another backwards WLS or a new -/// forwards LE branch, then move the target block after the preheader (or it's -/// predecessor). +/// This requires checking the predecessor (ie. preheader or it's predecessor) +/// for a WLS and if its loopExit/target is before it. +/// If moving the predecessor won't convert a WLS (to the predecessor) from +/// a forward to a backward branching WLS, then move the predecessor block +/// to before the loopExit/target. bool ARMBlockPlacement::fixBackwardsWLS(MachineLoop *ML) { MachineInstr *WlsInstr = findWLS(ML); if (!WlsInstr) @@ -94,7 +94,8 @@ MachineBasicBlock *Predecessor = WlsInstr->getParent(); MachineBasicBlock *LoopExit = WlsInstr->getOperand(2).getMBB(); - // We don't want to move the function's entry block. + + // We don't want to move Preheader to before the function's entry block. if (!LoopExit->getPrevNode()) return false; if (blockIsBefore(Predecessor, LoopExit)) @@ -103,77 +104,38 @@ << Predecessor->getFullName() << " to " << LoopExit->getFullName() << "\n"); - // Make sure that moving the target block doesn't cause any of its WLSs - // that were previously not backwards to become backwards - bool CanMove = true; - MachineInstr *WlsInLoopExit = findWLSInBlock(LoopExit); - if (WlsInLoopExit) { - // An example loop structure where the LoopExit can't be moved, since - // bb1's WLS will become backwards once it's moved after bb3 - // bb1: - LoopExit - // WLS bb2 - // bb2: - LoopExit2 - // ... - // bb3: - Predecessor - // WLS bb1 - // bb4: - Header - MachineBasicBlock *LoopExit2 = WlsInLoopExit->getOperand(2).getMBB(); - // If the WLS from LoopExit to LoopExit2 is already backwards then - // moving LoopExit won't affect it, so it can be moved. If LoopExit2 is - // after the Predecessor then moving will keep it as a forward branch, so it - // can be moved. If LoopExit2 is between the Predecessor and LoopExit then - // moving LoopExit will make it a backwards branch, so it can't be moved - // since we'd fix one and introduce one backwards branch. - // TODO: Analyse the blocks to make a decision if it would be worth - // moving LoopExit even if LoopExit2 is between the Predecessor and - // LoopExit. - if (!blockIsBefore(LoopExit2, LoopExit) && - (LoopExit2 == Predecessor || blockIsBefore(LoopExit2, Predecessor))) { - LLVM_DEBUG(dbgs() << DEBUG_PREFIX - << "Can't move the target block as it would " - "introduce a new backwards WLS branch\n"); - CanMove = false; - } - } - - if (CanMove) { - // Make sure no LEs become forwards. - // An example loop structure where the LoopExit can't be moved, since - // bb2's LE will become forwards once bb1 is moved after bb3. - // bb1: - LoopExit - // bb2: - // LE bb1 - Terminator - // bb3: - Predecessor - // WLS bb1 - // bb4: - Header - for (auto It = LoopExit->getIterator(); It != Predecessor->getIterator(); - It++) { - MachineBasicBlock *MBB = &*It; - for (auto &Terminator : MBB->terminators()) { - if (Terminator.getOpcode() != ARM::t2LoopEnd && - Terminator.getOpcode() != ARM::t2LoopEndDec) - continue; - MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB(); - // The LE will become forwards branching if it branches to LoopExit - // which isn't allowed by the architecture, so we should avoid - // introducing these. - // TODO: Analyse the blocks to make a decision if it would be worth - // moving LoopExit even if we'd introduce a forwards LE - if (LETarget == LoopExit) { - LLVM_DEBUG(dbgs() << DEBUG_PREFIX - << "Can't move the target block as it would " - "introduce a new forwards LE branch\n"); - CanMove = false; - break; - } + // Make sure no forward branching WLSs to the Predecessor become backwards + // branching. An example loop structure where the Predecessor can't be moved, + // since bb2's WLS will become forwards once bb3 is moved before/above bb1. + // + // bb1: - LoopExit + // bb2: + // WLS bb3 + // bb3: - Predecessor + // WLS bb1 + // bb4: - Header + for (auto It = ++LoopExit->getIterator(); It != Predecessor->getIterator(); + ++It) { + MachineBasicBlock *MBB = &*It; + for (auto &Terminator : MBB->terminators()) { + if (Terminator.getOpcode() != ARM::t2WhileLoopStartLR) + continue; + MachineBasicBlock *WLSTarget = Terminator.getOperand(2).getMBB(); + // TODO: Analyse the blocks to make a decision if it would be worth + // moving Preheader even if we'd introduce a backwards WLS + if (WLSTarget == Predecessor) { + LLVM_DEBUG( + dbgs() << DEBUG_PREFIX + << "Can't move Predecessor" + "block as it would convert a WLS from forward to a " + "backwards branching WLS\n"); + return false; } } } - if (CanMove) - moveBasicBlock(LoopExit, Predecessor); - - return CanMove; + moveBasicBlock(Predecessor, LoopExit); + return true; } /// Updates ordering (of WLS BB and their loopExits) in inner loops first @@ -212,18 +174,20 @@ return BBUtils->getOffsetOf(Other) > BBUtils->getOffsetOf(BB); } -/// Moves a given MBB to be positioned after another MBB while maintaining -/// existing control flow +// Moves a BasicBlock before another, without changing the control flow void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB, - MachineBasicBlock *After) { - LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " after " - << After->getName() << "\n"); + MachineBasicBlock *Before) { + LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " before " + << Before->getName() << "\n"); MachineBasicBlock *BBPrevious = BB->getPrevNode(); assert(BBPrevious && "Cannot move the function entry basic block"); - MachineBasicBlock *AfterNext = After->getNextNode(); MachineBasicBlock *BBNext = BB->getNextNode(); - BB->moveAfter(After); + MachineBasicBlock *BeforePrev = Before->getPrevNode(); + assert(BeforePrev && + "Cannot move the given block to before the function entry block"); + MachineFunction *F = BB->getParent(); + BB->moveBefore(Before); // Since only the blocks are to be moved around (but the control flow must // not change), if there were any fall-throughs (to/from adjacent blocks), @@ -251,12 +215,14 @@ // Fix fall-through to the moved BB from the one that used to be before it. if (BBPrevious->isSuccessor(BB)) FixFallthrough(BBPrevious, BB); - // Fix fall through from the destination BB to the one that used to follow. - if (AfterNext && After->isSuccessor(AfterNext)) - FixFallthrough(After, AfterNext); + // Fix fall through from the destination BB to the one that used to before it. + if (BeforePrev->isSuccessor(Before)) + FixFallthrough(BeforePrev, Before); // Fix fall through from the moved BB to the one that used to follow. if (BBNext && BB->isSuccessor(BBNext)) FixFallthrough(BB, BBNext); - BBUtils->adjustBBOffsetsAfter(After); + F->RenumberBlocks(); + BBUtils->computeAllBlockSizes(); + BBUtils->adjustBBOffsetsAfter(&F->front()); } diff --git a/llvm/test/CodeGen/Thumb2/block-placement.mir b/llvm/test/CodeGen/Thumb2/block-placement.mir --- a/llvm/test/CodeGen/Thumb2/block-placement.mir +++ b/llvm/test/CodeGen/Thumb2/block-placement.mir @@ -1,51 +1,54 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -run-pass=arm-block-placement %s -o - | FileCheck %s --- | - ; Checks that loopExitBlock gets moved (in forward direction) if there is a backwards WLS to it. - define void @backwards_branch(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 { + + ; Checks that Predecessor gets moved (to before the LoopExit) if it contains a backward WLS. + define void @backwards_branch(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) { entry: unreachable } - ; Checks that loopExitBlock does not get reordered (since it is entry block) even if there is a backwards WLS to it. - define void @backwards_branch_entry_block(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 { + ; Checks that Predecessor (containing a backwards WLS) does not get moved to before the loopExit if it is the entry block. + define void @backwards_branch_entry_block(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) { entry: unreachable } - ; Checks that loopExitBlock (containing a backwards WLS) is moved (in forward direction) if there is a backwards WLS to it. - define void @backwards_branch_target_already_backwards(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 { + ; Checks that Predecessor (to which a forward WLS exists) is not moved if moving it would cause the WLS to become backwards branching. + define void @backwards_branch_backwards_wls(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) { entry: unreachable } - define void @backwards_branch_sibling(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) local_unnamed_addr #0 { + ; Checks that a MachineFunction is unaffected if it doesn't contain any WLS (pseudo) instruction. + define void @no_predecessor(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) { entry: unreachable } - ; Checks that loopExitBlock (to which a backwards LE exists) is not moved if moving it would cause the LE to become forwards branching. - define void @backwards_branch_forwards_le(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) local_unnamed_addr #0 { + ; Within a nested (Both the WLS and loopExit are at depth=3 here) loop, checks that Predecessor + ; gets moved (in backward direction) if there exists a backdwards WLS from it to the LoopExit. + define void @nested_loops(i32 %n, i32 %m, i32 %l, i8* noalias %X, i8* noalias %Y) { entry: unreachable } - ; Checks that a MachineFunction is unaffected if it doesn't contain any WLS (pseudo) instruction. - define void @no_preheader(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) local_unnamed_addr #0 { + ; Checks that Predecessor (to which a forward WLS exists) is moved if moving it would NOT cause the WLS + ; to become backwards branching. + define void @backwards_branch_forwards_wls(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) { entry: unreachable } - ; Within a nested loop, checks that loopExit gets moved (in forward direction) if there exists a backwards WLS to it. - ; Both the WLS and loopExit are at depth=3. - define void @nested_loops(i32 %n, i32 %m, i32 %l, i8* noalias %X, i8* noalias %Y) local_unnamed_addr #0 { + ; Checks that multiple predecessor case is handled appropriately + define void @multiple_predecessors(i32 %d, i32 %e, i32 %f) { entry: unreachable } - declare dso_local i32 @g(...) local_unnamed_addr #1 + declare dso_local i32 @g(...) - declare dso_local i32 @h(...) local_unnamed_addr #1 + declare dso_local i32 @h(...) ... --- @@ -53,22 +56,22 @@ body: | ; CHECK-LABEL: name: backwards_branch ; CHECK: bb.0: - ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: successors: %bb.1(0x80000000) ; CHECK: tCMPi8 renamable $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate - ; CHECK: bb.2: + ; CHECK: bb.1: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr + ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r0, %bb.2, implicit-def dead $cpsr ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg - ; CHECK: bb.1: + ; CHECK: bb.2: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ; CHECK: bb.3: - ; CHECK: successors: %bb.3(0x7c000000), %bb.1(0x04000000) + ; CHECK: successors: %bb.3(0x7c000000), %bb.2(0x04000000) ; CHECK: renamable $r0 = tLDRi renamable $r2, 0, 14 /* CC::al */, $noreg ; CHECK: tSTRi killed renamable $r0, renamable $r1, 0, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr - ; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg + ; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg bb.0: successors: %bb.2(0x80000000) liveins: $r0, $r1, $r2, $lr @@ -144,199 +147,32 @@ ... --- -name: backwards_branch_target_already_backwards -body: | - ; CHECK-LABEL: name: backwards_branch_target_already_backwards - ; CHECK: bb.0: - ; CHECK: successors: %bb.2(0x50000000), %bb.1(0x30000000) - ; CHECK: tCMPi8 $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: t2Bcc %bb.1, 11 /* CC::lt */, killed $cpsr - ; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg - ; CHECK: bb.2: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: $lr = tMOVr $r0, 14 /* CC::al */, $noreg - ; CHECK: renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $lr, %bb.1, implicit-def dead $cpsr - ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg - ; CHECK: bb.1: - ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: t2IT 11, 8, implicit-def $itstate - ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r1, %bb.0, implicit-def dead $cpsr - ; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg - ; CHECK: bb.3: - ; CHECK: successors: %bb.3(0x7c000000), %bb.1(0x04000000) - ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr - ; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg - ; CHECK: bb.4: - ; CHECK: successors: %bb.5(0x80000000) - ; CHECK: renamable $r0 = t2ADDrs killed renamable $r3, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r1, %bb.6, implicit-def dead $cpsr - ; CHECK: bb.5: - ; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000) - ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr - ; CHECK: t2B %bb.6, 14 /* CC::al */, $noreg - ; CHECK: bb.6: - ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc - bb.0: - successors: %bb.1(0x50000000), %bb.3(0x30000000) - liveins: $r0, $r1, $r2, $r3, $lr - - tCMPi8 $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - t2Bcc %bb.3, 11 /* CC::lt */, killed $cpsr - t2B %bb.1, 14 /* CC::al */, $noreg - - bb.3: - successors: %bb.4(0x80000000) - liveins: $r1, $r3 - - tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - t2IT 11, 8, implicit-def $itstate - frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate - $lr = t2WhileLoopStartLR killed renamable $r1, %bb.0, implicit-def dead $cpsr - t2B %bb.4, 14 /* CC::al */, $noreg - - bb.1: - successors: %bb.2(0x80000000) - liveins: $r0, $r1, $r2, $r3 - - $lr = tMOVr $r0, 14 /* CC::al */, $noreg - renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg - $lr = t2WhileLoopStartLR killed renamable $lr, %bb.3, implicit-def dead $cpsr - - bb.2: - successors: %bb.2(0x7c000000), %bb.3(0x04000000) - liveins: $lr, $r0, $r1, $r3 - - renamable $lr = t2LoopEndDec killed renamable $lr, %bb.2, implicit-def dead $cpsr - t2B %bb.3, 14 /* CC::al */, $noreg - - bb.4: - successors: %bb.5(0x80000000) - liveins: $r1, $r3 - - renamable $r0 = t2ADDrs killed renamable $r3, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg - $lr = t2WhileLoopStartLR killed renamable $r1, %bb.6, implicit-def dead $cpsr - - bb.5: - successors: %bb.5(0x7c000000), %bb.6(0x04000000) - liveins: $lr, $r0 - - renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr - t2B %bb.6, 14 /* CC::al */, $noreg - - bb.6: - frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc - -... ---- -name: backwards_branch_sibling -body: | - ; CHECK-LABEL: name: backwards_branch_sibling - ; CHECK: bb.0: - ; CHECK: successors: %bb.2(0x50000000), %bb.1(0x30000000) - ; CHECK: tCMPi8 $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: t2Bcc %bb.1, 11 /* CC::lt */, killed $cpsr - ; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg - ; CHECK: bb.1: - ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: t2IT 11, 8, implicit-def $itstate - ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r1, %bb.2, implicit-def dead $cpsr - ; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg - ; CHECK: bb.2: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: $lr = tMOVr $r0, 14 /* CC::al */, $noreg - ; CHECK: renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $lr, %bb.1, implicit-def dead $cpsr - ; CHECK: bb.3: - ; CHECK: successors: %bb.3(0x7c000000), %bb.1(0x04000000) - ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr - ; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg - ; CHECK: bb.4: - ; CHECK: successors: %bb.5(0x80000000) - ; CHECK: renamable $r0 = t2ADDrs killed renamable $r3, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r1, %bb.6, implicit-def dead $cpsr - ; CHECK: bb.5: - ; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000) - ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr - ; CHECK: t2B %bb.6, 14 /* CC::al */, $noreg - ; CHECK: bb.6: - ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc - bb.0: - successors: %bb.1(0x50000000), %bb.3(0x30000000) - liveins: $r0, $r1, $r2, $r3, $lr - - tCMPi8 $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - t2Bcc %bb.3, 11 /* CC::lt */, killed $cpsr - t2B %bb.1, 14 /* CC::al */, $noreg - - bb.3: - successors: %bb.4(0x80000000) - liveins: $r1, $r3 - - tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - t2IT 11, 8, implicit-def $itstate - frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate - $lr = t2WhileLoopStartLR killed renamable $r1, %bb.1, implicit-def dead $cpsr - t2B %bb.4, 14 /* CC::al */, $noreg - - bb.1: - successors: %bb.2(0x80000000) - liveins: $r0, $r1, $r2, $r3 - - $lr = tMOVr $r0, 14 /* CC::al */, $noreg - renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg - $lr = t2WhileLoopStartLR killed renamable $lr, %bb.3, implicit-def dead $cpsr - - bb.2: - successors: %bb.2(0x7c000000), %bb.3(0x04000000) - liveins: $lr, $r0, $r1, $r3 - - renamable $lr = t2LoopEndDec killed renamable $lr, %bb.2, implicit-def dead $cpsr - t2B %bb.3, 14 /* CC::al */, $noreg - - bb.4: - successors: %bb.5(0x80000000) - liveins: $r1, $r3 - - renamable $r0 = t2ADDrs killed renamable $r3, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg - $lr = t2WhileLoopStartLR killed renamable $r1, %bb.6, implicit-def dead $cpsr - - bb.5: - successors: %bb.5(0x7c000000), %bb.6(0x04000000) - liveins: $lr, $r0 - - renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr - t2B %bb.6, 14 /* CC::al */, $noreg - - bb.6: - frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc -... ---- -name: backwards_branch_forwards_le +name: backwards_branch_backwards_wls body: | - ; CHECK-LABEL: name: backwards_branch_forwards_le + ; CHECK-LABEL: name: backwards_branch_backwards_wls ; CHECK: bb.0: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: tCMPi8 renamable $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate ; CHECK: bb.1: - ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.1, implicit-def dead $cpsr ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ; CHECK: bb.2: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr + ; CHECK: successors: %bb.3(0x40000000), %bb.5(0x40000000) + ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r0, %bb.3, implicit-def dead $cpsr + ; CHECK: t2B %bb.5, 14 /* CC::al */, $noreg ; CHECK: bb.3: - ; CHECK: successors: %bb.3(0x7c000000), %bb.1(0x04000000) - ; CHECK: renamable $r0 = tLDRi renamable $r2, 0, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r0, renamable $r1, 0, 14 /* CC::al */, $noreg - ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr + ; CHECK: successors: %bb.1(0x7c000000), %bb.4(0x04000000) + ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr + ; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg + ; CHECK: bb.4: + ; CHECK: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr ; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg + ; CHECK: bb.5: + ; CHECK: successors: %bb.5(0x40000000), %bb.3(0x40000000) + ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr + ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg bb.0: successors: %bb.2(0x80000000) liveins: $r0, $r1, $r2, $lr @@ -346,29 +182,40 @@ frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate bb.1: - renamable $lr = t2LoopEndDec killed renamable $lr, %bb.1, implicit-def dead $cpsr frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.2: - successors: %bb.3(0x80000000) + successors: %bb.3(0x80000000), %bb.5(0x80000000) liveins: $r0, $r1, $r2 - $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr + $lr = t2WhileLoopStartLR killed renamable $r0, %bb.3, implicit-def dead $cpsr + t2B %bb.5, 14 /* CC::al */, $noreg bb.3: - successors: %bb.3(0x7c000000), %bb.1(0x04000000) + successors: %bb.1(0x7c000000), %bb.4(0x04000000) liveins: $lr, $r1, $r2 - renamable $r0 = tLDRi renamable $r2, 0, 14 /* CC::al */, $noreg - tSTRi killed renamable $r0, renamable $r1, 0, 14 /* CC::al */, $noreg - renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr + $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr + t2B %bb.4, 14 /* CC::al */, $noreg + + bb.4: + successors: %bb.1, %bb.4 + liveins: $lr, $r1, $r2 + + renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr t2B %bb.1, 14 /* CC::al */, $noreg + bb.5: + successors: %bb.5, %bb.3 + liveins: $lr, $r1, $r2 + + renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr + t2B %bb.3, 14 /* CC::al */, $noreg ... --- -name: no_preheader +name: no_predecessor body: | - ; CHECK-LABEL: name: no_preheader + ; CHECK-LABEL: name: no_predecessor ; CHECK: bb.0: ; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000) ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $r5, $r7, killed $lr, implicit-def $sp, implicit $sp @@ -518,26 +365,26 @@ ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2Bcc %bb.2, 11 /* CC::lt */, killed $cpsr ; CHECK: bb.4: - ; CHECK: successors: %bb.6(0x80000000) + ; CHECK: successors: %bb.5(0x80000000) ; CHECK: liveins: $r0, $r1, $r3, $r8, $r9, $r12 ; CHECK: renamable $r4, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg ; CHECK: $r10 = tMOVr $r12, 14 /* CC::al */, $noreg ; CHECK: $r2 = tMOVr $r3, 14 /* CC::al */, $noreg - ; CHECK: t2B %bb.6, 14 /* CC::al */, $noreg - ; CHECK: bb.6: - ; CHECK: successors: %bb.7(0x50000000), %bb.5(0x30000000) + ; CHECK: t2B %bb.5, 14 /* CC::al */, $noreg + ; CHECK: bb.5: + ; CHECK: successors: %bb.7(0x50000000), %bb.6(0x30000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12 - ; CHECK: renamable $lr = t2WhileLoopStartLR killed renamable $r9, %bb.5, implicit-def dead $cpsr + ; CHECK: renamable $lr = t2WhileLoopStartLR killed renamable $r9, %bb.6, implicit-def dead $cpsr ; CHECK: t2B %bb.7, 14 /* CC::al */, $noreg - ; CHECK: bb.5: - ; CHECK: successors: %bb.2(0x04000000), %bb.6(0x7c000000) + ; CHECK: bb.6: + ; CHECK: successors: %bb.2(0x04000000), %bb.5(0x7c000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12 ; CHECK: renamable $r4, dead $cpsr = nuw nsw tADDi8 killed renamable $r4, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 1, 14 /* CC::al */, $noreg ; CHECK: tCMPr renamable $r4, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: renamable $r10 = t2ADDri killed renamable $r10, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: t2Bcc %bb.2, 0 /* CC::eq */, killed $cpsr - ; CHECK: t2B %bb.6, 14 /* CC::al */, $noreg + ; CHECK: t2B %bb.5, 14 /* CC::al */, $noreg ; CHECK: bb.7: ; CHECK: successors: %bb.8(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12 @@ -545,11 +392,11 @@ ; CHECK: $r6 = tMOVr $r2, 14 /* CC::al */, $noreg ; CHECK: t2B %bb.8, 14 /* CC::al */, $noreg ; CHECK: bb.8: - ; CHECK: successors: %bb.8(0x7c000000), %bb.5(0x04000000) + ; CHECK: successors: %bb.8(0x7c000000), %bb.6(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r8, $r9, $r10, $r12 ; CHECK: tSTRi killed $r0, $r1, 0, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.8, implicit-def dead $cpsr - ; CHECK: t2B %bb.5, 14 /* CC::al */, $noreg + ; CHECK: t2B %bb.6, 14 /* CC::al */, $noreg ; CHECK: bb.9: ; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $pc bb.0: @@ -638,3 +485,184 @@ $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $pc ... +--- +name: backwards_branch_forwards_wls +body: | + ; CHECK-LABEL: name: backwards_branch_forwards_wls + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: tCMPi8 renamable $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 11, 8, implicit-def $itstate + ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r0, %bb.2, implicit-def dead $cpsr + ; CHECK: t2B %bb.5, 14 /* CC::al */, $noreg + ; CHECK: bb.2: + ; CHECK: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r0, %bb.3, implicit-def dead $cpsr + ; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg + ; CHECK: bb.3: + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + ; CHECK: bb.4: + ; CHECK: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr + ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg + ; CHECK: bb.5: + ; CHECK: successors: %bb.5(0x40000000), %bb.2(0x40000000) + ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr + ; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg + bb.0: + successors: %bb.2 + liveins: $r0, $r1, $r2, $lr + + tCMPi8 renamable $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 11, 8, implicit-def $itstate + frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.2: + successors: %bb.3, %bb.5 + liveins: $r0, $r1, $r2 + + $lr = t2WhileLoopStartLR killed renamable $r0, %bb.3, implicit-def dead $cpsr + t2B %bb.5, 14 /* CC::al */, $noreg + + bb.1: + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + + bb.3: + successors: %bb.4, %bb.1 + liveins: $lr, $r1, $r2 + + $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr + t2B %bb.4, 14 /* CC::al */, $noreg + + bb.4: + successors: %bb.1, %bb.4 + liveins: $lr, $r1, $r2 + + renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr + t2B %bb.1, 14 /* CC::al */, $noreg + + bb.5: + successors: %bb.5, %bb.3 + liveins: $lr, $r1, $r2 + + renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr + t2B %bb.3, 14 /* CC::al */, $noreg + +... +--- +name: multiple_predecessors +body: | + ; CHECK-LABEL: name: multiple_predecessors + ; CHECK: bb.0: + ; CHECK: successors: %bb.3(0x55555555), %bb.2(0x2aaaaaab) + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: $sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 12 + ; CHECK: tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 0, 8, implicit-def $itstate + ; CHECK: tCMPi8 killed renamable $r1, 8, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate + ; CHECK: t2Bcc %bb.2, 0 /* CC::eq */, killed $cpsr + ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg + ; CHECK: bb.1: + ; CHECK: successors: %bb.6(0x40000000), %bb.3(0x40000000) + ; CHECK: renamable $lr = t2WhileLoopStartLR killed renamable $r2, %bb.3, implicit-def dead $cpsr + ; CHECK: t2B %bb.6, 14 /* CC::al */, $noreg + ; CHECK: bb.2: + ; CHECK: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK: renamable $lr = t2WhileLoopStartLR renamable $r2, %bb.3, implicit-def dead $cpsr + ; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg + ; CHECK: bb.3: + ; CHECK: $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit undef $r0 + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg + ; CHECK: renamable $r1 = t2ADDri $sp, 2, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r0 = IMPLICIT_DEF + ; CHECK: bb.5: + ; CHECK: successors: %bb.5(0x7c000000), %bb.1(0x04000000) + ; CHECK: renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 1, 0, $noreg, undef renamable $q0 + ; CHECK: MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg + ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr + ; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg + ; CHECK: bb.6: + ; CHECK: successors: %bb.7(0x80000000) + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg + ; CHECK: bb.7: + ; CHECK: successors: %bb.7(0x7c000000), %bb.3(0x04000000) + ; CHECK: renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 2, 0, $noreg, undef renamable $q0 + ; CHECK: MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg + ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.7, implicit-def dead $cpsr + ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg + bb.0: + successors: %bb.7(0x80000000), %bb.1(0x40000000) + liveins: $r0, $r1, $r2, $r7, $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + $sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg + frame-setup CFI_INSTRUCTION def_cfa_offset 12 + tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 0, 8, implicit-def $itstate + tCMPi8 killed renamable $r1, 8, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate + t2Bcc %bb.1, 0 /* CC::eq */, killed $cpsr + + bb.7: + $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit undef $r0 + + bb.1: + successors: %bb.3(0x40000000), %bb.7(0x40000000) + liveins: $r2 + + renamable $lr = t2WhileLoopStartLR renamable $r2, %bb.7, implicit-def dead $cpsr + t2B %bb.3, 14 /* CC::al */, $noreg + + bb.3: + successors: %bb.4(0x80000000) + liveins: $lr, $r2 + + renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg + renamable $r1 = t2ADDri $sp, 2, 14 /* CC::al */, $noreg, $noreg + renamable $r0 = IMPLICIT_DEF + + bb.4: + successors: %bb.4(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $r0, $r1, $r2, $r3 + + renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 1, 0, $noreg, undef renamable $q0 + MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg + renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr + t2B %bb.2, 14 /* CC::al */, $noreg + + bb.2: + successors: %bb.5(0x40000000), %bb.7(0x40000000) + liveins: $r0, $r1, $r2 + + renamable $lr = t2WhileLoopStartLR killed renamable $r2, %bb.7, implicit-def dead $cpsr + t2B %bb.5, 14 /* CC::al */, $noreg + + bb.5: + successors: %bb.6(0x80000000) + liveins: $lr, $r0, $r1 + + renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg + + bb.6: + successors: %bb.6(0x7c000000), %bb.7(0x04000000) + liveins: $lr, $r0, $r1, $r3 + + renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 2, 0, $noreg, undef renamable $q0 + MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg + renamable $lr = t2LoopEndDec killed renamable $lr, %bb.6, implicit-def dead $cpsr + t2B %bb.7, 14 /* CC::al */, $noreg + +... diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -1077,10 +1077,23 @@ ; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: b .LBB16_3 -; CHECK-NEXT: .LBB16_3: @ %while.body +; CHECK-NEXT: b .LBB16_5 +; CHECK-NEXT: .LBB16_3: @ %for.end +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: wls lr, r0, .LBB16_4 +; CHECK-NEXT: b .LBB16_9 +; CHECK-NEXT: .LBB16_4: @ %while.end +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: subs.w r12, r12, #1 +; CHECK-NEXT: vstrb.8 q0, [r2], #8 +; CHECK-NEXT: add.w r0, r5, r0, lsl #1 +; CHECK-NEXT: add.w r5, r0, #8 +; CHECK-NEXT: beq.w .LBB16_12 +; CHECK-NEXT: .LBB16_5: @ %while.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB16_5 Depth 2 +; CHECK-NEXT: @ Child Loop BB16_7 Depth 2 ; CHECK-NEXT: @ Child Loop BB16_10 Depth 2 ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: ldrh.w lr, [r3, #14] @@ -1117,14 +1130,14 @@ ; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vfma.f16 q0, q1, lr ; CHECK-NEXT: cmp r0, #16 -; CHECK-NEXT: blo .LBB16_6 -; CHECK-NEXT: @ %bb.4: @ %for.body.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: blo .LBB16_8 +; CHECK-NEXT: @ %bb.6: @ %for.body.preheader +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-NEXT: dls lr, r0 ; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: .LBB16_5: @ %for.body -; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1 +; CHECK-NEXT: .LBB16_7: @ %for.body +; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldrh r0, [r6], #16 ; CHECK-NEXT: vldrw.u32 q1, [r5] @@ -1155,39 +1168,26 @@ ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: adds r5, #16 ; CHECK-NEXT: vfma.f16 q0, q1, r4 -; CHECK-NEXT: le lr, .LBB16_5 -; CHECK-NEXT: b .LBB16_7 -; CHECK-NEXT: .LBB16_6: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: le lr, .LBB16_7 +; CHECK-NEXT: b .LBB16_3 +; CHECK-NEXT: .LBB16_8: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: .LBB16_7: @ %for.end -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: wls lr, r0, .LBB16_8 -; CHECK-NEXT: b .LBB16_9 -; CHECK-NEXT: .LBB16_8: @ %while.end -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: subs.w r12, r12, #1 -; CHECK-NEXT: vstrb.8 q0, [r2], #8 -; CHECK-NEXT: add.w r0, r5, r0, lsl #1 -; CHECK-NEXT: add.w r5, r0, #8 -; CHECK-NEXT: beq .LBB16_12 ; CHECK-NEXT: b .LBB16_3 ; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: .LBB16_10: @ %while.body76 -; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1 +; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldrh r4, [r6], #2 ; CHECK-NEXT: vldrh.u16 q1, [r0], #2 ; CHECK-NEXT: vfma.f16 q0, q1, r4 ; CHECK-NEXT: le lr, .LBB16_10 ; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: add.w r5, r5, r0, lsl #1 -; CHECK-NEXT: b .LBB16_8 +; CHECK-NEXT: b .LBB16_4 ; CHECK-NEXT: .LBB16_12: @ %if.end ; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1071,10 +1071,24 @@ ; CHECK-NEXT: str r6, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: b .LBB16_3 -; CHECK-NEXT: .LBB16_3: @ %while.body +; CHECK-NEXT: b .LBB16_5 +; CHECK-NEXT: .LBB16_3: @ %for.end +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldrd r0, r9, [sp, #20] @ 8-byte Folded Reload +; CHECK-NEXT: wls lr, r0, .LBB16_4 +; CHECK-NEXT: b .LBB16_9 +; CHECK-NEXT: .LBB16_4: @ %while.end +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: subs.w r12, r12, #1 +; CHECK-NEXT: vstrb.8 q0, [r2], #16 +; CHECK-NEXT: add.w r0, r4, r0, lsl #2 +; CHECK-NEXT: add.w r4, r0, #16 +; CHECK-NEXT: beq .LBB16_12 +; CHECK-NEXT: .LBB16_5: @ %while.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB16_5 Depth 2 +; CHECK-NEXT: @ Child Loop BB16_7 Depth 2 ; CHECK-NEXT: @ Child Loop BB16_10 Depth 2 ; CHECK-NEXT: add.w lr, r10, #8 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 @@ -1101,14 +1115,14 @@ ; CHECK-NEXT: vfma.f32 q0, q3, r11 ; CHECK-NEXT: cmp r0, #16 ; CHECK-NEXT: vfma.f32 q0, q1, r8 -; CHECK-NEXT: blo .LBB16_6 -; CHECK-NEXT: @ %bb.4: @ %for.body.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: blo .LBB16_8 +; CHECK-NEXT: @ %bb.6: @ %for.body.preheader +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: dls lr, r0 ; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: .LBB16_5: @ %for.body -; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1 +; CHECK-NEXT: .LBB16_7: @ %for.body +; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldm.w r7, {r0, r3, r5, r6, r8, r11} ; CHECK-NEXT: vldrw.u32 q1, [r4], #32 @@ -1129,40 +1143,26 @@ ; CHECK-NEXT: vfma.f32 q0, q2, r11 ; CHECK-NEXT: vfma.f32 q0, q3, r9 ; CHECK-NEXT: vfma.f32 q0, q1, r1 -; CHECK-NEXT: le lr, .LBB16_5 -; CHECK-NEXT: b .LBB16_7 -; CHECK-NEXT: .LBB16_6: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: le lr, .LBB16_7 +; CHECK-NEXT: b .LBB16_3 +; CHECK-NEXT: .LBB16_8: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: .LBB16_7: @ %for.end -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 -; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: ldrd r0, r9, [sp, #20] @ 8-byte Folded Reload -; CHECK-NEXT: wls lr, r0, .LBB16_8 -; CHECK-NEXT: b .LBB16_9 -; CHECK-NEXT: .LBB16_8: @ %while.end -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: subs.w r12, r12, #1 -; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: add.w r0, r4, r0, lsl #2 -; CHECK-NEXT: add.w r4, r0, #16 -; CHECK-NEXT: beq .LBB16_12 ; CHECK-NEXT: b .LBB16_3 ; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: mov r3, r4 ; CHECK-NEXT: .LBB16_10: @ %while.body76 -; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1 +; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldr r0, [r7], #4 ; CHECK-NEXT: vldrw.u32 q1, [r3], #4 ; CHECK-NEXT: vfma.f32 q0, q1, r0 ; CHECK-NEXT: le lr, .LBB16_10 ; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit -; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1 +; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: add.w r4, r4, r0, lsl #2 -; CHECK-NEXT: b .LBB16_8 +; CHECK-NEXT: b .LBB16_4 ; CHECK-NEXT: .LBB16_12: @ %if.end ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}