diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -257,6 +257,11 @@ MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const; + /// Emit a CB(N)Z instruction which branches to \p DestMBB. + MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, + MachineBasicBlock *DestMBB, + MachineIRBuilder &MIB) const; + // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. // We use these manually instead of using the importer since it doesn't // support SDNodeXForm. @@ -1394,9 +1399,7 @@ // Only support EQ and NE. If we have LT, then it *is* possible to fold, but // we don't want to do this. When we have an AND and LT, we need a TST/ANDS, // so folding would be redundant. - if (Pred != CmpInst::Predicate::ICMP_EQ && - Pred != CmpInst::Predicate::ICMP_NE) - return false; + assert(ICmpInst::isEquality(Pred) && "Expected only eq/ne?"); // Check if the AND has a constant on its RHS which we can use as a mask. // If it's a power of 2, then it's the same as checking a specific bit. @@ -1415,6 +1418,27 @@ return true; } +MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, + bool IsNegative, + MachineBasicBlock *DestMBB, + MachineIRBuilder &MIB) const { + assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!"); + MachineRegisterInfo &MRI = *MIB.getMRI(); + assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() == + AArch64::GPRRegBankID && + "Expected GPRs only?"); + auto Ty = MRI.getType(CompareReg); + unsigned Width = Ty.getSizeInBits(); + assert(!Ty.isVector() && "Expected scalar only?"); + assert(Width <= 64 && "Expected width to be at most 64?"); + static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, + {AArch64::CBNZW, AArch64::CBNZX}}; + unsigned Opc = OpcTable[IsNegative][Width == 64]; + auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB); + constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); + return &*BranchMI; +} + bool AArch64InstructionSelector::selectCompareBranch( MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { @@ -1477,51 +1501,39 @@ } } - if (!VRegAndVal) { - std::swap(RHS, LHS); - VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); - LHSMI = getDefIgnoringCopies(LHS, MRI); - } + // Attempt to handle commutative condition codes. Right now, that's only + // eq/ne. + if (ICmpInst::isEquality(Pred)) { + if (!VRegAndVal) { + std::swap(RHS, LHS); + VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); + LHSMI = getDefIgnoringCopies(LHS, MRI); + } - if (!VRegAndVal || VRegAndVal->Value != 0) { - // If we can't select a CBZ then emit a cmp + Bcc. - auto Pred = - static_cast(CCMI->getOperand(1).getPredicate()); - emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3), - CCMI->getOperand(1), MIB); - const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred); - MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); - I.eraseFromParent(); - return true; - } + if (VRegAndVal && VRegAndVal->Value == 0) { + // If there's a G_AND feeding into this branch, try to fold it away by + // emitting a TB(N)Z instead. + if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB, + MIB)) { + I.eraseFromParent(); + return true; + } - // Try to emit a TB(N)Z for an eq or ne condition. - if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB, - MIB)) { - I.eraseFromParent(); - return true; + // Otherwise, try to emit a CB(N)Z instead. + auto LHSTy = MRI.getType(LHS); + if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { + emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); + I.eraseFromParent(); + return true; + } + } } - const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI); - if (RB.getID() != AArch64::GPRRegBankID) - return false; - if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ) - return false; - - const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits(); - unsigned CBOpc = 0; - if (CmpWidth <= 32) - CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW); - else if (CmpWidth == 64) - CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX); - else - return false; - - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc)) - .addUse(LHS) - .addMBB(DestMBB) - .constrainAllUses(TII, TRI, RBI); - + // Couldn't optimize. Emit a compare + bcc. + emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3), + CCMI->getOperand(1), MIB); + const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred); + MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); I.eraseFromParent(); return true; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir @@ -143,8 +143,7 @@ ; CHECK: liveins: $w0 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri [[COPY]], 0, implicit-def $nzcv - ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv - ; CHECK: TBNZW [[CSINCWr]], 0, %bb.1 + ; CHECK: Bcc 11, %bb.1, implicit $nzcv ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR @@ -176,8 +175,7 @@ ; CHECK: liveins: $w0 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri [[COPY]], 0, implicit-def $nzcv - ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv - ; CHECK: TBNZW [[CSINCWr]], 0, %bb.1 + ; CHECK: Bcc 12, %bb.1, implicit $nzcv ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir @@ -100,8 +100,7 @@ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: %copy:gpr64 = COPY $x0 ; CHECK: [[ANDSXri:%[0-9]+]]:gpr64 = ANDSXri %copy, 8000, implicit-def $nzcv - ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv - ; CHECK: TBNZW %cmp, 0, %bb.1 + ; CHECK: Bcc 11, %bb.1, implicit $nzcv ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR @@ -133,8 +132,7 @@ ; CHECK: %copy:gpr64 = COPY $x0 ; CHECK: %zero:gpr64 = COPY $xzr ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %zero, %copy, implicit-def $nzcv - ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv - ; CHECK: TBNZW %cmp, 0, %bb.1 + ; CHECK: Bcc 11, %bb.1, implicit $nzcv ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR