Index: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2977,6 +2977,15 @@ /// to /// b. /// +/// \brief Replace compare and branch sequence by TBZ/TBNZ instruction when +/// the compare's constant operand is power of 2. +/// +/// Examples: +/// and w8, w8, #0x400 +/// cbnz w8, L1 +/// to +/// tbnz w8, #10, L1 +/// /// \param MI Conditional Branch /// \return True when the simple conditional branch is generated /// @@ -3027,34 +3036,82 @@ MachineInstr *DefMI = MRI->getVRegDef(VReg); - // Look for CSINC - if (!(DefMI->getOpcode() == AArch64::CSINCWr && - DefMI->getOperand(1).getReg() == AArch64::WZR && - DefMI->getOperand(2).getReg() == AArch64::WZR) && - !(DefMI->getOpcode() == AArch64::CSINCXr && - DefMI->getOperand(1).getReg() == AArch64::XZR && - DefMI->getOperand(2).getReg() == AArch64::XZR)) - return false; - - if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) - return false; - - AArch64CC::CondCode CC = - (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); - bool CheckOnlyCCWrites = true; - // Convert only when the condition code is not modified between - // the CSINC and the branch. The CC may be used by other - // instructions in between. - if (modifiesConditionCode(DefMI, MI, CheckOnlyCCWrites, &getRegisterInfo())) + // Look through COPY instructions to find definition. + while (DefMI->isCopy()) { + unsigned CopyVReg = DefMI->getOperand(1).getReg(); + if (!MRI->hasOneNonDBGUse(CopyVReg)) + return false; + if (!MRI->hasOneDef(CopyVReg)) + return false; + DefMI = MRI->getVRegDef(CopyVReg); + } + + switch (DefMI->getOpcode()) { + default: return false; - MachineBasicBlock &RefToMBB = *MBB; - MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB(); - DebugLoc DL = MI->getDebugLoc(); - if (IsNegativeBranch) - CC = AArch64CC::getInvertedCondCode(CC); - BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); - MI->eraseFromParent(); - return true; + // Fold AND into a TBZ/TBNZ if constant operand is power of 2. + case AArch64::ANDWri: + case AArch64::ANDXri: { + if (IsTestAndBranch) + return false; + if (DefMI->getParent() != MBB) + return false; + if (!MRI->hasOneNonDBGUse(VReg)) + return false; + + uint64_t Mask = AArch64_AM::decodeLogicalImmediate( + DefMI->getOperand(2).getImm(), + (DefMI->getOpcode() == AArch64::ANDWri) ? 32 : 64); + if (!isPowerOf2_64(Mask)) + return false; + + MachineOperand &MO = DefMI->getOperand(1); + unsigned NewReg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(NewReg)) + return false; + + assert(!MRI->def_empty(NewReg) && "Register must be defined."); + + MachineBasicBlock &RefToMBB = *MBB; + MachineBasicBlock *TBB = MI->getOperand(1).getMBB(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Imm = Log2_64(Mask); + unsigned Opc = (Imm < 32) + ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW) + : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX); + BuildMI(RefToMBB, MI, DL, get(Opc)).addReg(NewReg).addImm(Imm).addMBB(TBB); + MI->eraseFromParent(); + return true; + } + // Look for CSINC + case AArch64::CSINCWr: + case AArch64::CSINCXr: { + if (!(DefMI->getOperand(1).getReg() == AArch64::WZR && + DefMI->getOperand(2).getReg() == AArch64::WZR) && + !(DefMI->getOperand(1).getReg() == AArch64::XZR && + DefMI->getOperand(2).getReg() == AArch64::XZR)) + return false; + + if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) + return false; + + AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); + bool CheckOnlyCCWrites = true; + // Convert only when the condition code is not modified between + // the CSINC and the branch. The CC may be used by other + // instructions in between. + if (modifiesConditionCode(DefMI, MI, CheckOnlyCCWrites, &getRegisterInfo())) + return false; + MachineBasicBlock &RefToMBB = *MBB; + MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB(); + DebugLoc DL = MI->getDebugLoc(); + if (IsNegativeBranch) + CC = AArch64CC::getInvertedCondCode(CC); + BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); + MI->eraseFromParent(); + return true; + } + } } std::pair Index: llvm/trunk/test/CodeGen/AArch64/aarch64-tbz.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/aarch64-tbz.ll +++ llvm/trunk/test/CodeGen/AArch64/aarch64-tbz.ll @@ -0,0 +1,51 @@ +; RUN: llc -mtriple=aarch64-linux-gnueabi < %s | FileCheck %s + +; CHECK-LABEL: test1 +; CHECK: tbz {{w[0-9]}}, #3, {{.LBB0_3}} +; CHECK: tbz [[REG1:x[0-9]+]], #2, {{.LBB0_3}} +; CHECK-NOT: and [[REG2:x[0-9]+]], [[REG1]], #0x4 +; CHECK-NEXT-NOT: cbz [[REG2]], {{.LBB0_3}} + +; CHECK: b +define void @test1(i64 %A, i64 %B) { +entry: + %and = and i64 %A, 4 + %notlhs = icmp eq i64 %and, 0 + %and.1 = and i64 %B, 8 + %0 = icmp eq i64 %and.1, 0 + %1 = or i1 %0, %notlhs + br i1 %1, label %if.end3, label %if.then2 + +if.then2: ; preds = %entry + tail call void @foo(i64 %A, i64 %B) + br label %if.end3 + +if.end3: ; preds = %if.then2, %entry + ret void +} + +; CHECK-LABEL: test2 +; CHECK: cbz {{x[0-9]}}, {{.LBB1_3}} +; CHECK: tbz [[REG1:x[0-9]+]], #3, {{.LBB1_3}} +; CHECK-NOT: and [REG2:x[0-9]+], [[REG1]], #0x08 +; CHECK-NEXT-NOT: cbz [[REG2]], {{.LBB1_3}} + +define void @test2(i64 %A, i64* readonly %B) #0 { +entry: + %tobool = icmp eq i64* %B, null + %and = and i64 %A, 8 + %tobool1 = icmp eq i64 %and, 0 + %or.cond = or i1 %tobool, %tobool1 + br i1 %or.cond, label %if.end3, label %if.then2 + +if.then2: ; preds = %entry + %0 = load i64, i64* %B, align 4 + tail call void @foo(i64 %A, i64 %0) + br label %if.end3 + +if.end3: ; preds = %entry, %if.then2 + ret void +} + + +declare void @foo(i64, i64) Index: llvm/trunk/test/CodeGen/AArch64/fast-isel-tbz.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fast-isel-tbz.ll +++ llvm/trunk/test/CodeGen/AArch64/fast-isel-tbz.ll @@ -1,4 +1,4 @@ -; RUN: llc -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -disable-peephole -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK %s ; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK --check-prefix=FAST %s define i32 @icmp_eq_i8(i8 zeroext %a) {