Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2953,6 +2953,15 @@ /// to /// b. /// +/// \brief Replace test and branch sequence by TBZ/TBNZ instruction when +/// the test's constant operand is power of 2. +/// +/// Examples: +/// and w8, w8, #0x400 +/// cbnz w8, L1 +/// to +/// tbnz w8, #10, L1 +/// /// \param MI Conditional Branch /// \return True when the simple conditional branch is generated /// @@ -3003,6 +3012,63 @@ MachineInstr *DefMI = MRI->getVRegDef(VReg); + // Look for AND if MI is CBZ/CBNZ and DefMI is a COPY instruction + if (!IsTestAndBranch && DefMI->isCopy()) { + if (DefMI->getParent() != MBB) return false; + + if (!MRI->hasOneNonDBGUse(VReg)) return false; + + unsigned AndVReg = DefMI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(AndVReg)) + return false; + + if (!MRI->hasOneNonDBGUse(AndVReg)) return false; + + MachineInstr *AndDefMI = MRI->getVRegDef(AndVReg); + if (!AndDefMI) return false; + if (!(AndDefMI->getOpcode() == AArch64::ANDWri) && + !(AndDefMI->getOpcode() == AArch64::ANDXri)) + return false; + + uint64_t Mask = AArch64_AM::decodeLogicalImmediate( + AndDefMI->getOperand(2).getImm(), + (AndDefMI->getOpcode() == AArch64::ANDXri) ? 64 : 32); + if (!isPowerOf2_64(Mask)) + return false; + + MachineOperand &MO = AndDefMI->getOperand(1); + unsigned NewReg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(NewReg)) + return false; + + assert(!MRI->def_empty(NewReg) && "Register must be defined."); + + MachineBasicBlock &RefToMBB = *MBB; + MachineBasicBlock *TBB = MI->getOperand(1).getMBB(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Imm = Log2_64(Mask); + unsigned Opc; + + if (Imm > 31) { + MRI->constrainRegClass(NewReg, &AArch64::GPR64RegClass); + if (IsNegativeBranch) { + Opc = AArch64::TBNZX; + } else { + Opc = AArch64::TBZX; + } + } else { + MRI->constrainRegClass(NewReg, &AArch64::GPR32RegClass); + if (IsNegativeBranch) { + Opc = AArch64::TBNZW; + } else { + Opc = AArch64::TBZW; + } + } + BuildMI(RefToMBB, MI, DL, get(Opc)).addReg(NewReg).addImm(Imm).addMBB(TBB); + MI->eraseFromParent(); + return true; + } + // Look for CSINC if (!(DefMI->getOpcode() == AArch64::CSINCWr && DefMI->getOperand(1).getReg() == AArch64::WZR && Index: test/CodeGen/AArch64/aarch64-tbz.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64-tbz.ll @@ -0,0 +1,51 @@ +; RUN: llc -mtriple=aarch64-linux-gnueabi < %s | FileCheck %s + +; CHECK-LABEL: test1 +; CHECK: tbz {{w[0-9]}}, #3, {{.LBB0_3}} +; CHECK: tbz [[REG1:x[0-9]+]], #2, {{.LBB0_3}} +; CHECK-NOT: and [[REG2:x[0-9]+]], [[REG1]], #0x4 +; CHECK-NEXT-NOT: cbz [[REG2]], {{.LBB0_3}} + +; CHECK: b +define void @test1(i64 %A, i64 %B) { +entry: + %and = and i64 %A, 4 + %notlhs = icmp eq i64 %and, 0 + %and.1 = and i64 %B, 8 + %0 = icmp eq i64 %and.1, 0 + %1 = or i1 %0, %notlhs + br i1 %1, label %if.end3, label %if.then2 + +if.then2: ; preds = %entry + tail call void @foo(i64 %A, i64 %B) + br label %if.end3 + +if.end3: ; preds = %if.then2, %entry + ret void +} + +; CHECK-LABEL: test2 +; CHECK: cbz {{x[0-9]}}, {{.LBB1_3}} +; CHECK: tbz [[REG1:x[0-9]+]], #3, {{.LBB1_3}} +; CHECK-NOT: and [REG2:x[0-9]+], [[REG1]], #0x08 +; CHECK-NEXT-NOT: cbz [[REG2]], {{.LBB1_3}} + +define void @test2(i64 %A, i64* readonly %B) #0 { +entry: + %tobool = icmp eq i64* %B, null + %and = and i64 %A, 8 + %tobool1 = icmp eq i64 %and, 0 + %or.cond = or i1 %tobool, %tobool1 + br i1 %or.cond, label %if.end3, label %if.then2 + +if.then2: ; preds = %entry + %0 = load i64, i64* %B, align 4 + tail call void @foo(i64 %A, i64 %0) + br label %if.end3 + +if.end3: ; preds = %entry, %if.then2 + ret void +} + + +declare void @foo(i64, i64) Index: test/CodeGen/AArch64/fast-isel-tbz.ll =================================================================== --- test/CodeGen/AArch64/fast-isel-tbz.ll +++ test/CodeGen/AArch64/fast-isel-tbz.ll @@ -1,4 +1,4 @@ -; RUN: llc -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -disable-peephole -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK %s ; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK --check-prefix=FAST %s define i32 @icmp_eq_i8(i8 zeroext %a) {