Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -1106,6 +1106,8 @@ } virtual bool optimizeCondBranch(MachineInstr *MI) const { return false; } + virtual bool optimizeTestAndBranch(MachineInstr *MI) const {return false; } + /// Try to remove the load by folding it to a register operand at the use. /// We fold the load instructions if and only if the /// def and use are in the same BB. We only look at one load and see Index: lib/CodeGen/PeepholeOptimizer.cpp =================================================================== --- lib/CodeGen/PeepholeOptimizer.cpp +++ lib/CodeGen/PeepholeOptimizer.cpp @@ -154,6 +154,7 @@ bool optimizeSelect(MachineInstr *MI, SmallPtrSetImpl &LocalMIs); bool optimizeCondBranch(MachineInstr *MI); + bool optimizeTestAndBranch(MachineInstr *MI); bool optimizeCoalescableCopy(MachineInstr *MI); bool optimizeUncoalescableCopy(MachineInstr *MI, SmallPtrSetImpl &LocalMIs); @@ -602,6 +603,12 @@ return TII->optimizeCondBranch(MI); } +/// \brief Check if a TBZ/TBNZ can be +// generated instead of an AND followed by CBZ/CBNZ. +bool PeepholeOptimizer::optimizeTestAndBranch(MachineInstr *MI) { + return TII->optimizeTestAndBranch(MI); +} + /// \brief Try to find the next source that share the same register file /// for the value defined by \p Reg and \p SubReg. /// When true is returned, the \p RewriteMap can be used by the client to @@ -1584,6 +1591,11 @@ continue; } + if (MI->isConditionalBranch() && optimizeTestAndBranch(MI)) { + Changed = true; + continue; + } + if (MI->isConditionalBranch() && optimizeCondBranch(MI)) { Changed = true; continue; Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -163,6 +163,7 @@ unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const override; bool optimizeCondBranch(MachineInstr *MI) const override; + bool optimizeTestAndBranch(MachineInstr *MI) const override; /// Return true when there is potentially a faster code sequence /// for an instruction chain ending in . All potential patterns are /// listed in the array. Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3033,6 +3033,106 @@ return true; } +/// \brief Replace test and branch sequence by TBZ/TBNZ instruction when +/// the test's constant operand is power of 2. +/// +/// Examples: +/// and w8, w8, #0x400 +/// cbnz w8, L1 +/// to +/// tbnz w8, #10, L1 +/// +/// \param MI Conditional Branch +/// \return True when the TBZ/TBNZ is generated +/// +bool AArch64InstrInfo::optimizeTestAndBranch(MachineInstr *MI) const { + bool Is64Bit = false; + bool IsNegativeBranch = false; + switch (MI->getOpcode()) { + default: + return false; + case AArch64::CBZW: + break; + case AArch64::CBNZW: + IsNegativeBranch = true; + break; + case AArch64::CBZX: + Is64Bit = true; + break; + case AArch64::CBNZX: + IsNegativeBranch = true; + Is64Bit = true; + break; + } + // Find Definition. + assert(MI->getParent() && "Incomplete machine instruciton\n"); + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + unsigned VReg = MI->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(VReg)) + return false; + + MachineInstr *DefMI = MRI->getVRegDef(VReg); + + // Look for AND + if (DefMI->getParent() != MBB) return false; + + if (!DefMI->isCopy()) return false; + + if (!MRI->hasOneNonDBGUse(VReg)) return false; + + unsigned AndVReg = DefMI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(AndVReg)) + return false; + + if (!MRI->hasOneNonDBGUse(AndVReg)) return false; + + MachineInstr *AndDefMI = MRI->getVRegDef(AndVReg); + if (!AndDefMI) return false; + if (!(AndDefMI->getOpcode() == AArch64::ANDWri) && + !(AndDefMI->getOpcode() == AArch64::ANDXri)) + return false; + + uint64_t Mask = AArch64_AM::decodeLogicalImmediate( + AndDefMI->getOperand(2).getImm(), + Is64Bit ? 64 : 32); + if (!isPowerOf2_64(Mask)) + return false; + + MachineOperand &MO = AndDefMI->getOperand(1); + unsigned NewReg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(NewReg)) + return false; + + assert(!MRI->def_empty(NewReg) && "Register must be defined."); + + MachineBasicBlock &RefToMBB = *MBB; + MachineBasicBlock *TBB = MI->getOperand(1).getMBB(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Imm = Log2_64(Mask); + unsigned Opc; + + if (Imm > 31) { + MRI->constrainRegClass(NewReg, &AArch64::GPR64RegClass); + if (IsNegativeBranch) { + Opc = AArch64::TBNZX; + } else { + Opc = AArch64::TBZX; + } + } else { + MRI->constrainRegClass(NewReg, &AArch64::GPR32RegClass); + if (IsNegativeBranch) { + Opc = AArch64::TBNZW; + } else { + Opc = AArch64::TBZW; + } + } + BuildMI(RefToMBB, MI, DL, get(Opc)).addReg(NewReg).addImm(Imm).addMBB(TBB); + MI->eraseFromParent(); + return true; +} + std::pair AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { const unsigned Mask = AArch64II::MO_FRAGMENT; Index: test/CodeGen/AArch64/aarch64-tbz.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64-tbz.ll @@ -0,0 +1,51 @@ +; RUN: llc -mtriple=aarch64-linux-gnueabi < %s | FileCheck %s + +; CHECK-LABEL: test1 +; CHECK: tbz {{w[0-9]}}, #3, {{.LBB0_3}} +; CHECK: tbz [[REG1:x[0-9]+]], #2, {{.LBB0_3}} +; CHECK-NOT: and [[REG2:x[0-9]+]], [[REG1]], #0x4 +; CHECK-NEXT-NOT: cbz [[REG2]], {{.LBB0_3}} + +; CHECK: b +define void @test1(i64 %A, i64 %B) { +entry: + %and = and i64 %A, 4 + %notlhs = icmp eq i64 %and, 0 + %and.1 = and i64 %B, 8 + %0 = icmp eq i64 %and.1, 0 + %1 = or i1 %0, %notlhs + br i1 %1, label %if.end3, label %if.then2 + +if.then2: ; preds = %entry + tail call void @foo(i64 %A, i64 %B) + br label %if.end3 + +if.end3: ; preds = %if.then2, %entry + ret void +} + +; CHECK-LABEL: test2 +; CHECK: cbz {{x[0-9]}}, {{.LBB1_3}} +; CHECK: tbz [[REG1:x[0-9]+]], #3, {{.LBB1_3}} +; CHECK-NOT: and [REG2:x[0-9]+], [[REG1]], #0x08 +; CHECK-NEXT-NOT: cbz [[REG2]], {{.LBB1_3}} + +define void @test2(i64 %A, i64* readonly %B) #0 { +entry: + %tobool = icmp eq i64* %B, null + %and = and i64 %A, 8 + %tobool1 = icmp eq i64 %and, 0 + %or.cond = or i1 %tobool, %tobool1 + br i1 %or.cond, label %if.end3, label %if.then2 + +if.then2: ; preds = %entry + %0 = load i64, i64* %B, align 4 + tail call void @foo(i64 %A, i64 %0) + br label %if.end3 + +if.end3: ; preds = %entry, %if.then2 + ret void +} + + +declare void @foo(i64, i64) Index: test/CodeGen/AArch64/fast-isel-tbz.ll =================================================================== --- test/CodeGen/AArch64/fast-isel-tbz.ll +++ test/CodeGen/AArch64/fast-isel-tbz.ll @@ -1,4 +1,4 @@ -; RUN: llc -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -disable-peephole -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK %s ; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK --check-prefix=FAST %s define i32 @icmp_eq_i8(i8 zeroext %a) {