Index: llvm/lib/Target/ARM/ARMBlockPlacement.cpp =================================================================== --- llvm/lib/Target/ARM/ARMBlockPlacement.cpp +++ llvm/lib/Target/ARM/ARMBlockPlacement.cpp @@ -41,6 +41,7 @@ bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other); bool fixBackwardsWLS(MachineLoop *ML); bool processPostOrderLoops(MachineLoop *ML); + bool revertWhileToDo(MachineInstr *WLS, MachineLoop *ML); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -82,6 +83,51 @@ return nullptr; } +bool ARMBlockPlacement::revertWhileToDo(MachineInstr *WLS, MachineLoop *ML) { + // lr = t2WhileLoopStartTP r0, r1, TgtBB + // t2Br Ph + // -> + // cmp r0, 0 + // brcc TgtBB + // LR = t2DoLoopStartTP r0, r1 + // t2Br Ph + // FIXME: This required the DLS/DLSTP to be a terminator, which isn't true for + // a DLS. For now we don't revert those, even though this code would + // theoretically work if it were not for machine verifier issues. + if (WLS->getOpcode() != ARM::t2WhileLoopStartTP) + return false; + + MachineBasicBlock *Preheader = WLS->getParent(); + assert(WLS != &Preheader->back()); + assert(WLS->getNextNode() == &Preheader->back()); + MachineInstr *Br = &Preheader->back(); + assert(Br->getOpcode() == ARM::t2B); + assert(Br->getOperand(1).getImm() == 14); + + // Clear the kill flags, as the cmp/bcc will no longer kill any operands. + WLS->getOperand(1).setIsKill(false); + if (WLS->getOpcode() == ARM::t2WhileLoopStartTP) + WLS->getOperand(2).setIsKill(false); + + // Create a new DPS to replace the WLS + MachineInstrBuilder MIB = + BuildMI(*Preheader, Br, WLS->getDebugLoc(), + TII->get(WLS->getOpcode() == ARM::t2WhileLoopStartTP + ? ARM::t2DoLoopStartTP + : ARM::t2DoLoopStart)); + MIB.add(WLS->getOperand(0)); + MIB.add(WLS->getOperand(1)); + if (WLS->getOpcode() == ARM::t2WhileLoopStartTP) + MIB.add(WLS->getOperand(2)); + + LLVM_DEBUG(dbgs() << DEBUG_PREFIX + << "Reverting While Loop to Do Loop: " << *WLS << "\n"); + + RevertWhileLoopStartLR(WLS, TII, ARM::t2Bcc, true); + + return true; +} + /// Checks if loop has a backwards branching WLS, and if possible, fixes it. /// This requires checking the predecessor (ie. preheader or it's predecessor) /// for a WLS and if its loopExit/target is before it. @@ -130,7 +176,7 @@ << "Can't move Predecessor" "block as it would convert a WLS from forward to a " "backwards branching WLS\n"); - return false; + return revertWhileToDo(WlsInstr, ML); } } } Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -6863,6 +6863,7 @@ let Inst{13} = 0b1; let Inst{11-1} = 0b00000000000; let Unpredictable{10-1} = 0b1111111111; + let isTerminator = 1; } class MVE_WLSTP size> Index: llvm/test/CodeGen/Thumb2/block-placement.mir =================================================================== --- llvm/test/CodeGen/Thumb2/block-placement.mir +++ llvm/test/CodeGen/Thumb2/block-placement.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -run-pass=arm-block-placement %s -o - | FileCheck %s +# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -run-pass=arm-block-placement -verify-machineinstrs %s -o - | FileCheck %s --- | ; Checks that Predecessor gets moved (to before the LoopExit) if it contains a backward WLS. Index: llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll +++ llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll @@ -87,17 +87,13 @@ ; CHECK-NEXT: add.w r4, r0, #15 ; CHECK-NEXT: adds r3, #19 ; CHECK-NEXT: lsrs r4, r4, #4 -; CHECK-NEXT: subs.w lr, r4, #0 +; CHECK-NEXT: cmp.w r4, #0 ; CHECK-NEXT: beq .LBB0_2 -; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: dlstp.8 lr, r0 ; CHECK-NEXT: .LBB0_10: @ Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vctp.8 r0 -; CHECK-NEXT: subs r0, #16 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrbt.8 q3, [r3], #16 -; CHECK-NEXT: subs.w lr, lr, #1 -; CHECK-NEXT: bne .LBB0_10 +; CHECK-NEXT: vstrb.8 q3, [r3], #16 +; CHECK-NEXT: letp lr, .LBB0_10 ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_11: ; CHECK-NEXT: movw r12, :lower16:arr_183 Index: llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll +++ llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll @@ -510,18 +510,13 @@ ; CHECK-NEXT: letp lr, .LBB18_8 ; CHECK-NEXT: .LBB18_9: @ %loop ; CHECK-NEXT: @ in Loop: Header=BB18_3 Depth=1 -; CHECK-NEXT: mov r1, r0 -; CHECK-NEXT: subs.w lr, r12, #0 +; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq .LBB18_2 -; CHECK-NEXT: b .LBB18_10 +; CHECK-NEXT: dlstp.8 lr, r0 ; CHECK-NEXT: .LBB18_10: @ Parent Loop BB18_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vctp.8 r1 -; CHECK-NEXT: subs r1, #16 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrbt.8 q0, [r3], #16 -; CHECK-NEXT: subs.w lr, lr, #1 -; CHECK-NEXT: bne .LBB18_10 +; CHECK-NEXT: vstrb.8 q0, [r3], #16 +; CHECK-NEXT: letp lr, .LBB18_10 ; CHECK-NEXT: b .LBB18_2 ; CHECK-NEXT: .LBB18_11: @ %exit ; CHECK-NEXT: pop {r4, pc}