Index: llvm/lib/Target/ARM/ARMBlockPlacement.cpp =================================================================== --- llvm/lib/Target/ARM/ARMBlockPlacement.cpp +++ llvm/lib/Target/ARM/ARMBlockPlacement.cpp @@ -41,6 +41,7 @@ bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other); bool fixBackwardsWLS(MachineLoop *ML); bool processPostOrderLoops(MachineLoop *ML); + bool revertWhileToDo(MachineInstr *WLS, MachineLoop *ML); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -82,6 +83,66 @@ return nullptr; } +// Revert a WhileLoopStart to an equivalent DoLoopStart and branch. Note that +// because of the branches this requires an extra block to be created. +bool ARMBlockPlacement::revertWhileToDo(MachineInstr *WLS, MachineLoop *ML) { + // lr = t2WhileLoopStartTP r0, r1, TgtBB + // t2Br Ph + // -> + // cmp r0, 0 + // brcc TgtBB + // block2: + // LR = t2DoLoopStartTP r0, r1 + // t2Br Ph + MachineBasicBlock *Preheader = WLS->getParent(); + assert(WLS != &Preheader->back()); + assert(WLS->getNextNode() == &Preheader->back()); + MachineInstr *Br = &Preheader->back(); + assert(Br->getOpcode() == ARM::t2B); + assert(Br->getOperand(1).getImm() == 14); + + // Clear the kill flags, as the cmp/bcc will no longer kill any operands. + WLS->getOperand(1).setIsKill(false); + if (WLS->getOpcode() == ARM::t2WhileLoopStartTP) + WLS->getOperand(2).setIsKill(false); + + // Create the new block + MachineBasicBlock *NewBlock = Preheader->getParent()->CreateMachineBasicBlock( + Preheader->getBasicBlock()); + Preheader->getParent()->insert(++Preheader->getIterator(), NewBlock); + // Move the Br to it + Br->removeFromParent(); + NewBlock->insert(NewBlock->end(), Br); + // And setup the successors correctly. + Preheader->replaceSuccessor(Br->getOperand(0).getMBB(), NewBlock); + NewBlock->addSuccessor(Br->getOperand(0).getMBB()); + + // Create a new DLS to replace the WLS + MachineInstrBuilder MIB = + BuildMI(*NewBlock, Br, WLS->getDebugLoc(), + TII->get(WLS->getOpcode() == ARM::t2WhileLoopStartTP + ? ARM::t2DoLoopStartTP + : ARM::t2DoLoopStart)); + MIB.add(WLS->getOperand(0)); + MIB.add(WLS->getOperand(1)); + if (WLS->getOpcode() == ARM::t2WhileLoopStartTP) + MIB.add(WLS->getOperand(2)); + + LLVM_DEBUG(dbgs() << DEBUG_PREFIX + << "Reverting While Loop to Do Loop: " << *WLS << "\n"); + + RevertWhileLoopStartLR(WLS, TII, ARM::t2Bcc, true); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *NewBlock); + + Preheader->getParent()->RenumberBlocks(); + BBUtils->computeAllBlockSizes(); + BBUtils->adjustBBOffsetsAfter(Preheader); + + return true; +} + /// Checks if loop has a backwards branching WLS, and if possible, fixes it. /// This requires checking the predecessor (ie. preheader or it's predecessor) /// for a WLS and if its loopExit/target is before it. @@ -130,7 +191,7 @@ << "Can't move Predecessor" "block as it would convert a WLS from forward to a " "backwards branching WLS\n"); - return false; + return revertWhileToDo(WlsInstr, ML); } } } @@ -225,5 +286,5 @@ F->RenumberBlocks(); BBUtils->computeAllBlockSizes(); - BBUtils->adjustBBOffsetsAfter(&F->front()); + BBUtils->adjustBBOffsetsAfter(BB); } Index: llvm/test/CodeGen/Thumb2/block-placement.mir =================================================================== --- llvm/test/CodeGen/Thumb2/block-placement.mir +++ llvm/test/CodeGen/Thumb2/block-placement.mir @@ -168,24 +168,29 @@ ; CHECK: bb.1: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ; CHECK: bb.2: - ; CHECK: successors: %bb.3(0x40000000), %bb.5(0x40000000) + ; CHECK: successors: %bb.3(0x40000000), %bb.6(0x40000000) ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r0, %bb.3, implicit-def dead $cpsr - ; CHECK: t2B %bb.5, 14 /* CC::al */, $noreg + ; CHECK: t2B %bb.6, 14 /* CC::al */, $noreg ; CHECK: bb.3: ; CHECK: successors: %bb.1(0x7c000000), %bb.4(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 - ; CHECK: $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr - ; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg + ; CHECK: t2CMPri renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2Bcc %bb.1, 0 /* CC::eq */, $cpsr ; CHECK: bb.4: - ; CHECK: successors: %bb.1(0x40000000), %bb.4(0x40000000) - ; CHECK: liveins: $lr, $r1, $r2 - ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr - ; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: liveins: $r2, $r1, $r0 + ; CHECK: $lr = t2DoLoopStart renamable $r0 + ; CHECK: t2B %bb.5, 14 /* CC::al */, $noreg ; CHECK: bb.5: - ; CHECK: successors: %bb.5(0x40000000), %bb.3(0x40000000) + ; CHECK: successors: %bb.1(0x40000000), %bb.5(0x40000000) ; CHECK: liveins: $lr, $r1, $r2 ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr + ; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg + ; CHECK: bb.6: + ; CHECK: successors: %bb.6(0x40000000), %bb.3(0x40000000) + ; CHECK: liveins: $lr, $r1, $r2 + ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.6, implicit-def dead $cpsr ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg bb.0: successors: %bb.2(0x80000000) Index: llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll +++ llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: cmp r0, #1 -; CHECK-NEXT: bls.w .LBB0_11 +; CHECK-NEXT: bls.w .LBB0_12 ; CHECK-NEXT: @ %bb.1: @ %for.body.us.preheader ; CHECK-NEXT: movw r5, :lower16:arr_183 ; CHECK-NEXT: movs r3, #0 @@ -31,7 +31,7 @@ ; CHECK-NEXT: @ Child Loop BB0_4 Depth 2 ; CHECK-NEXT: @ Child Loop BB0_6 Depth 2 ; CHECK-NEXT: @ Child Loop BB0_8 Depth 2 -; CHECK-NEXT: @ Child Loop BB0_10 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_11 Depth 2 ; CHECK-NEXT: ldr.w r0, [r2, r3, lsl #2] ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: ite ne @@ -87,28 +87,26 @@ ; CHECK-NEXT: add.w r4, r0, #15 ; CHECK-NEXT: adds r3, #19 ; CHECK-NEXT: lsrs r4, r4, #4 -; CHECK-NEXT: subs.w lr, r4, #0 +; CHECK-NEXT: cmp.w r4, #0 ; CHECK-NEXT: beq .LBB0_2 -; CHECK-NEXT: b .LBB0_10 -; CHECK-NEXT: .LBB0_10: @ Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: @ %bb.10: @ %land.end.us.2 +; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: dlstp.8 lr, r0 +; CHECK-NEXT: .LBB0_11: @ Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vctp.8 r0 -; CHECK-NEXT: subs r0, #16 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrbt.8 q3, [r3], #16 -; CHECK-NEXT: subs.w lr, lr, #1 -; CHECK-NEXT: bne .LBB0_10 +; CHECK-NEXT: vstrb.8 q3, [r3], #16 +; CHECK-NEXT: letp lr, .LBB0_11 ; CHECK-NEXT: b .LBB0_2 -; CHECK-NEXT: .LBB0_11: +; CHECK-NEXT: .LBB0_12: ; CHECK-NEXT: movw r12, :lower16:arr_183 ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: movt r12, :upper16:arr_183 ; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: vmov.i32 q2, #0x0 ; CHECK-NEXT: vmov.i32 q3, #0x0 -; CHECK-NEXT: b .LBB0_13 -; CHECK-NEXT: .LBB0_12: @ %for.body.lr.ph.3 -; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: b .LBB0_14 +; CHECK-NEXT: .LBB0_13: @ %for.body.lr.ph.3 +; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1 ; CHECK-NEXT: ldr r3, [r2, #4] ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: ite ne @@ -117,18 +115,18 @@ ; CHECK-NEXT: add.w r5, r12, r3 ; CHECK-NEXT: rsb.w r3, r3, #108 ; CHECK-NEXT: add.w r4, r5, #19 -; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_13 -; CHECK-NEXT: b .LBB0_23 -; CHECK-NEXT: .LBB0_13: @ %for.cond +; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_14 +; CHECK-NEXT: b .LBB0_24 +; CHECK-NEXT: .LBB0_14: @ %for.cond ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB0_15 Depth 2 -; CHECK-NEXT: @ Child Loop BB0_18 Depth 2 -; CHECK-NEXT: @ Child Loop BB0_21 Depth 2 -; CHECK-NEXT: @ Child Loop BB0_23 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_16 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_19 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_22 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_24 Depth 2 ; CHECK-NEXT: cmp r0, #2 -; CHECK-NEXT: blo .LBB0_16 -; CHECK-NEXT: @ %bb.14: @ %for.body.lr.ph -; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: blo .LBB0_17 +; CHECK-NEXT: @ %bb.15: @ %for.body.lr.ph +; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1 ; CHECK-NEXT: ldr r3, [r2, #4] ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: ite ne @@ -137,17 +135,17 @@ ; CHECK-NEXT: add.w r5, r12, r3 ; CHECK-NEXT: rsb.w r3, r3, #108 ; CHECK-NEXT: add.w r4, r5, #19 -; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_16 -; CHECK-NEXT: .LBB0_15: @ Parent Loop BB0_13 Depth=1 +; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_17 +; CHECK-NEXT: .LBB0_16: @ Parent Loop BB0_14 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vstrb.8 q0, [r4], #16 -; CHECK-NEXT: letp lr, .LBB0_15 -; CHECK-NEXT: .LBB0_16: @ %for.cond.backedge -; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: letp lr, .LBB0_16 +; CHECK-NEXT: .LBB0_17: @ %for.cond.backedge +; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1 ; CHECK-NEXT: cmp r0, #2 -; CHECK-NEXT: blo .LBB0_19 -; CHECK-NEXT: @ %bb.17: @ %for.body.lr.ph.1 -; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: blo .LBB0_20 +; CHECK-NEXT: @ %bb.18: @ %for.body.lr.ph.1 +; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1 ; CHECK-NEXT: ldr r3, [r2, #4] ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: ite ne @@ -156,17 +154,17 @@ ; CHECK-NEXT: add.w r5, r12, r3 ; CHECK-NEXT: rsb.w r3, r3, #108 ; CHECK-NEXT: add.w r4, r5, #19 -; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_19 -; CHECK-NEXT: .LBB0_18: @ Parent Loop BB0_13 Depth=1 +; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_20 +; CHECK-NEXT: .LBB0_19: @ Parent Loop BB0_14 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vstrb.8 q1, [r4], #16 -; CHECK-NEXT: letp lr, .LBB0_18 -; CHECK-NEXT: .LBB0_19: @ %for.cond.backedge.1 -; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: letp lr, .LBB0_19 +; CHECK-NEXT: .LBB0_20: @ %for.cond.backedge.1 +; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1 ; CHECK-NEXT: cmp r0, #2 -; CHECK-NEXT: blo .LBB0_22 -; CHECK-NEXT: @ %bb.20: @ %for.body.lr.ph.2 -; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: blo .LBB0_23 +; CHECK-NEXT: @ %bb.21: @ %for.body.lr.ph.2 +; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1 ; CHECK-NEXT: ldr r3, [r2, #4] ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: ite ne @@ -175,21 +173,21 @@ ; CHECK-NEXT: add.w r5, r12, r3 ; CHECK-NEXT: rsb.w r3, r3, #108 ; CHECK-NEXT: add.w r4, r5, #19 -; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_22 -; CHECK-NEXT: .LBB0_21: @ Parent Loop BB0_13 Depth=1 +; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_23 +; CHECK-NEXT: .LBB0_22: @ Parent Loop BB0_14 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vstrb.8 q2, [r4], #16 -; CHECK-NEXT: letp lr, .LBB0_21 -; CHECK-NEXT: .LBB0_22: @ %for.cond.backedge.2 -; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: letp lr, .LBB0_22 +; CHECK-NEXT: .LBB0_23: @ %for.cond.backedge.2 +; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1 ; CHECK-NEXT: cmp r0, #2 -; CHECK-NEXT: blo .LBB0_13 -; CHECK-NEXT: b .LBB0_12 -; CHECK-NEXT: .LBB0_23: @ Parent Loop BB0_13 Depth=1 +; CHECK-NEXT: blo .LBB0_14 +; CHECK-NEXT: b .LBB0_13 +; CHECK-NEXT: .LBB0_24: @ Parent Loop BB0_14 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vstrb.8 q3, [r4], #16 -; CHECK-NEXT: letp lr, .LBB0_23 -; CHECK-NEXT: b .LBB0_13 +; CHECK-NEXT: letp lr, .LBB0_24 +; CHECK-NEXT: b .LBB0_14 entry: %cmp = icmp ugt i8 %b, 1 br i1 %cmp, label %for.body.us.preheader, label %for.cond.preheader Index: llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll +++ llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll @@ -475,13 +475,13 @@ ; CHECK-NEXT: @ in Loop: Header=BB18_3 Depth=1 ; CHECK-NEXT: adds r4, #1 ; CHECK-NEXT: cmp.w r4, #1024 -; CHECK-NEXT: bge .LBB18_11 +; CHECK-NEXT: bge .LBB18_12 ; CHECK-NEXT: .LBB18_3: @ %loop ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB18_4 Depth 2 ; CHECK-NEXT: @ Child Loop BB18_6 Depth 2 ; CHECK-NEXT: @ Child Loop BB18_8 Depth 2 -; CHECK-NEXT: @ Child Loop BB18_10 Depth 2 +; CHECK-NEXT: @ Child Loop BB18_11 Depth 2 ; CHECK-NEXT: movw r3, :lower16:arr_56 ; CHECK-NEXT: add.w r1, r0, #15 ; CHECK-NEXT: movt r3, :upper16:arr_56 @@ -510,20 +510,17 @@ ; CHECK-NEXT: letp lr, .LBB18_8 ; CHECK-NEXT: .LBB18_9: @ %loop ; CHECK-NEXT: @ in Loop: Header=BB18_3 Depth=1 -; CHECK-NEXT: mov r1, r0 -; CHECK-NEXT: subs.w lr, r12, #0 +; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq .LBB18_2 -; CHECK-NEXT: b .LBB18_10 -; CHECK-NEXT: .LBB18_10: @ Parent Loop BB18_3 Depth=1 +; CHECK-NEXT: @ %bb.10: @ %loop +; CHECK-NEXT: @ in Loop: Header=BB18_3 Depth=1 +; CHECK-NEXT: dlstp.8 lr, r0 +; CHECK-NEXT: .LBB18_11: @ Parent Loop BB18_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vctp.8 r1 -; CHECK-NEXT: subs r1, #16 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrbt.8 q0, [r3], #16 -; CHECK-NEXT: subs.w lr, lr, #1 -; CHECK-NEXT: bne .LBB18_10 +; CHECK-NEXT: vstrb.8 q0, [r3], #16 +; CHECK-NEXT: letp lr, .LBB18_11 ; CHECK-NEXT: b .LBB18_2 -; CHECK-NEXT: .LBB18_11: @ %exit +; CHECK-NEXT: .LBB18_12: @ %exit ; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp sgt i32 %b, 0