Index: lib/Target/AArch64/AArch64BranchRelaxation.cpp =================================================================== --- lib/Target/AArch64/AArch64BranchRelaxation.cpp +++ lib/Target/AArch64/AArch64BranchRelaxation.cpp @@ -85,6 +85,7 @@ MachineFunction *MF; const AArch64InstrInfo *TII; + const TargetRegisterInfo *TRI; bool relaxBranchInstructions(); void scanFunction(); @@ -92,6 +93,8 @@ void adjustBlockOffsets(MachineBasicBlock &MBB); bool isBlockInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); bool fixupConditionalBranch(MachineInstr *MI); + bool fuseCompareAndBranch(MachineInstr *Compare, + SmallVectorImpl &NZCVUsers); void computeBlockSize(const MachineBasicBlock &MBB); unsigned getInstrOffset(MachineInstr *MI) const; void dumpBBs(); @@ -155,6 +158,13 @@ return false; } +static bool isNZCVLiveOut(MachineBasicBlock &MBB) { + for (auto *SI : MBB.successors()) + if (SI->isLiveIn(AArch64::NZCV)) + return true; + return false; +} + /// scanFunction - Do the initial scan of the function, building up /// information about each block. void AArch64BranchRelaxation::scanFunction() { @@ -459,6 +469,88 @@ return true; } +bool AArch64BranchRelaxation::fuseCompareAndBranch( + MachineInstr *Compare, SmallVectorImpl &NZCVUsers) { + + if (NZCVUsers.size() != 1) + return false; + + MachineInstr *Branch = NZCVUsers[0]; + if (Branch->getOpcode() != AArch64::Bcc) + return false; + + if (!Compare->getOperand(1).isReg() || !Compare->getOperand(2).isImm() || + Compare->getOperand(2).getImm() != 1) + return false; + + AArch64CC::CondCode CC = (AArch64CC::CondCode)Branch->getOperand(0).getImm(); + if (CC != AArch64CC::LT && CC != AArch64CC::GE) + return false; + + MachineBasicBlock &MBB = *Compare->getParent(); + unsigned SrcReg = Compare->getOperand(0).getReg(); + unsigned SrcReg2 = Compare->getOperand(1).getReg(); + if (!MBB.isLiveIn(SrcReg2)) + return false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), MBBC = Compare, MBBE = Branch; + for (; MBBI != MBBC; ++MBBI) + if (MBBI->modifiesRegister(SrcReg2, TRI)) + return false; + for (++MBBI; MBBI != MBBE; ++MBBI) + if (MBBI->modifiesRegister(SrcReg, TRI) || + MBBI->modifiesRegister(SrcReg2, TRI)) + return false; + + MachineInstr *LastUse; + for (auto *PBB : MBB.predecessors()) { + for (auto &PI : *PBB) { + if (PI.getNumOperands() > 0 && PI.getOperand(0).isReg() && + PI.getOperand(0).isUse() && PI.getOperand(0).getReg() == SrcReg2) + LastUse = &PI; + } + } + + bool Positive = false; + bool Is64Bit = false; + switch (LastUse->getOpcode()) { + default: + break; + case AArch64::TBNZX: + Positive = (LastUse->getOperand(1).getImm() == 63 && + LastUse->getOperand(2).getMBB() != &MBB); + Is64Bit = true; + break; + case AArch64::TBZX: + Positive = (LastUse->getOperand(1).getImm() == 63 && + LastUse->getOperand(2).getMBB() == &MBB); + Is64Bit = true; + break; + case AArch64::TBNZW: + Positive = (LastUse->getOperand(1).getImm() == 31 && + LastUse->getOperand(2).getMBB() != &MBB); + break; + case AArch64::TBZW: + Positive = (LastUse->getOperand(1).getImm() == 31 && + LastUse->getOperand(2).getMBB() == &MBB); + break; + } + if (!Positive) + return false; + + unsigned FusedOpcode = + Is64Bit ? (CC == AArch64CC::LT ? AArch64::CBZX : AArch64::CBNZX) + : (CC == AArch64CC::LT ? AArch64::CBZW : AArch64::CBNZW); + + BuildMI(MBB, Branch, Branch->getDebugLoc(), TII->get(FusedOpcode)) + .addReg(SrcReg2) + .addOperand(Branch->getOperand(1)) + .addReg(AArch64::NZCV, RegState::ImplicitDefine); + + Branch->eraseFromParent(); + MBB.updateTerminator(); + return true; +} bool AArch64BranchRelaxation::relaxBranchInstructions() { bool Changed = false; // Relaxing branches involves creating new basic blocks, so re-eval @@ -472,6 +564,30 @@ ++NumRelaxed; Changed = true; } + + bool CompleteNZCVUsers = !isNZCVLiveOut(MBB); + SmallVector NZCVUsers; + MachineBasicBlock::iterator MBBI = MBB.end(); + while (MBBI != MBB.begin()) { + MachineInstr *MI = --MBBI; + if (CompleteNZCVUsers && MI->isCompare() && + fuseCompareAndBranch(MI, NZCVUsers)) { + ++NumRelaxed; + ++MBBI; + MI->eraseFromParent(); + Changed = true; + NZCVUsers.clear(); + continue; + } + + if (MI->definesRegister(AArch64::NZCV)) { + NZCVUsers.clear(); + CompleteNZCVUsers = true; + } + + if (MI->readsRegister(AArch64::NZCV) && CompleteNZCVUsers) + NZCVUsers.push_back(MI); + } } return Changed; } @@ -486,6 +602,7 @@ DEBUG(dbgs() << "***** AArch64BranchRelaxation *****\n"); TII = (const AArch64InstrInfo *)MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. Index: test/CodeGen/AArch64/branch-relax-fuse-cbz.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/branch-relax-fuse-cbz.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=aarch64-linux--gnu -o - %s | FileCheck %s +%struct.arc = type { i64, %struct.node*, %struct.node*, i32, %struct.arc*, %struct.arc*, i64, i64 } +%struct.node = type { i64, i32, %struct.node*, %struct.node*, %struct.node*, %struct.node*, %struct.arc*, %struct.arc*, %struct.arc*, %struct.arc*, i64, i64, i32, i32 } +%struct.basket = type { %struct.arc*, i64, i64 } + +; Function Attrs: nounwind +define void @primal_bea_mpp() { +; CHECK-LABEL: primal_bea_mpp: +; CHECK: tbnz [[REG:x[0-9]+]], #63, .LBB0_5 +; CHECK: cbz [[REG]], .[[TRUE:LBB[0-9]+_[0-9]+]] +; CHECK-NOT: cmp [[REG]], #1 +; CHECK-NOT: b.lt .[[TRUE]] + +entry: + br label %for.body5 + +for.body5: ; preds = %if.then16, %lor.lhs.false, %land.lhs.true, %entry + %0 = load %struct.arc*, %struct.arc** undef, align 8 + %cmp10 = icmp slt i64 undef, 0 + br i1 %cmp10, label %land.lhs.true, label %lor.lhs.false + +land.lhs.true: ; preds = %for.body5 + br i1 undef, label %if.then16, label %for.body5 + +lor.lhs.false: ; preds = %for.body5 + %cmp12 = icmp sgt i64 undef, 0 + %cmp12.not = xor i1 %cmp12, true + %brmerge = or i1 %cmp12.not, false + br i1 %brmerge, label %for.body5, label %if.then16 + +if.then16: ; preds = %lor.lhs.false, %land.lhs.true + %a19 = getelementptr inbounds %struct.basket, %struct.basket* undef, i64 0, i32 0 + store %struct.arc* %0, %struct.arc** %a19, align 8 + br label %for.body5 +}