diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -89,6 +89,11 @@ bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; + bool tryOptAndIntoCompareBranch(MachineInstr *LHS, + int64_t CmpConstant, + const CmpInst::Predicate &Pred, + MachineBasicBlock *DstMBB, + MachineIRBuilder &MIB) const; bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; @@ -983,6 +988,64 @@ } } +bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( + MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred, + MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const { + // Given something like this: + // + // %x = ...Something... + // %one = G_CONSTANT i64 1 + // %zero = G_CONSTANT i64 0 + // %and = G_AND %x, %one + // %cmp = G_ICMP intpred(ne), %and, %zero + // %cmp_trunc = G_TRUNC %cmp + // G_BRCOND %cmp_trunc, %bb.3 + // + // We want to try and fold the AND into the G_BRCOND and produce either a + // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). + // + // In this case, we'd get + // + // TBNZ %x %bb.3 + // + if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND) + return false; + + // Need to be comparing against 0 to fold. + if (CmpConstant != 0) + return false; + + MachineRegisterInfo &MRI = *MIB.getMRI(); + unsigned Opc = 0; + Register TestReg = AndInst->getOperand(1).getReg(); + unsigned TestSize = MRI.getType(TestReg).getSizeInBits(); + + // Only support EQ and NE. If we have LT, then it *is* possible to fold, but + // we don't want to do this. When we have an AND and LT, we need a TST/ANDS, + // so folding would be redundant. + if (Pred == CmpInst::Predicate::ICMP_EQ) + Opc = TestSize == 32 ? AArch64::TBZW : AArch64::TBZX; + else if (Pred == CmpInst::Predicate::ICMP_NE) + Opc = TestSize == 32 ? AArch64::TBNZW : AArch64::TBNZX; + else + return false; + + // Check if the AND has a constant on its RHS which we can use as a mask. + // If it's a power of 2, then it's the same as checking a specific bit. + // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) + auto MaybeBit = + getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI); + if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value)) + return false; + uint64_t Bit = Log2_64(static_cast(MaybeBit->Value)); + + // Construct the branch. + auto BranchMI = + MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB); + constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); + return true; +} + bool AArch64InstructionSelector::selectCompareBranch( MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { @@ -1000,9 +1063,9 @@ if (!VRegAndVal) std::swap(RHS, LHS); + MachineIRBuilder MIB(I); VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); if (!VRegAndVal || VRegAndVal->Value != 0) { - MachineIRBuilder MIB(I); // If we can't select a CBZ then emit a cmp + Bcc. if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB)) @@ -1014,11 +1077,18 @@ return true; } + // Try to fold things into the branch. + const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate(); + MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI); + if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB, + MIB)) { + I.eraseFromParent(); + return true; + } + const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI); if (RB.getID() != AArch64::GPRRegBankID) return false; - - const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate(); if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ) return false; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir @@ -0,0 +1,257 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Verify that we can fold G_AND into G_BRCOND when all of the following hold: +# 1. We have a ne/eq G_ICMP feeding into the G_BRCOND +# 2. The G_ICMP is being compared against 0 +# 3. One of the operands of the G_AND is a power of 2 +# +# If all of these hold, we should produce a tbnz or a tbz. +... +--- +name: tbnz_and_s64 +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: tbnz_and_s64 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: TBNZX [[COPY]], 3, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 8 ; Power of 2 => TBNZ uses 3 as mask + %3:gpr(s64) = G_CONSTANT i64 0 + %2:gpr(s64) = G_AND %0, %1 + %5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3 + %4:gpr(s1) = G_TRUNC %5(s32) + G_BRCOND %4(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: tbz_and_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: tbz_and_s64 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: TBZX [[COPY]], 4, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 16 ; Power of 2 => TBNZ uses 4 as mask + %3:gpr(s64) = G_CONSTANT i64 0 + %2:gpr(s64) = G_AND %0, %1 + %5:gpr(s32) = G_ICMP intpred(eq), %2(s64), %3 + %4:gpr(s1) = G_TRUNC %5(s32) + G_BRCOND %4(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: tbnz_and_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: tbnz_and_s32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: TBNZW [[COPY]], 0, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 1 ; Power of 2 => TBNZ uses 0 as mask + %3:gpr(s32) = G_CONSTANT i32 0 + %2:gpr(s32) = G_AND %0, %1 + %5:gpr(s32) = G_ICMP intpred(ne), %2(s32), %3 + %4:gpr(s1) = G_TRUNC %5(s32) + G_BRCOND %4(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: tbz_and_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: tbz_and_s32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: TBZW [[COPY]], 0, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 1 ; Power of 2 => TBNZ uses 0 as mask + %3:gpr(s32) = G_CONSTANT i32 0 + %2:gpr(s32) = G_AND %0, %1 + %5:gpr(s32) = G_ICMP intpred(eq), %2(s32), %3 + %4:gpr(s1) = G_TRUNC %5(s32) + G_BRCOND %4(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: dont_fold_and_lt +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_fold_and_lt + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: TBNZW [[CSINCWr]], 0, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_CONSTANT i32 0 + %2:gpr(s32) = G_AND %0, %1 + %5:gpr(s32) = G_ICMP intpred(slt), %2(s32), %3 + %4:gpr(s1) = G_TRUNC %5(s32) + G_BRCOND %4(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: dont_fold_and_gt +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_fold_and_gt + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; CHECK: TBNZW [[CSINCWr]], 0, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_CONSTANT i32 0 + %2:gpr(s32) = G_AND %0, %1 + %5:gpr(s32) = G_ICMP intpred(sgt), %2(s32), %3 + %4:gpr(s1) = G_TRUNC %5(s32) + G_BRCOND %4(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: dont_fold_and_not_power_of_2 +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: dont_fold_and_not_power_of_2 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[COPY]], 4098 + ; CHECK: CBNZX [[ANDXri]], %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 7 + %3:gpr(s64) = G_CONSTANT i64 0 + %2:gpr(s64) = G_AND %0, %1 + %5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3 + %4:gpr(s1) = G_TRUNC %5(s32) + G_BRCOND %4(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: dont_fold_cmp_not_0 +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: dont_fold_cmp_not_0 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[COPY]], 8064 + ; CHECK: $xzr = SUBSXri [[ANDXri]], 4, 0, implicit-def $nzcv + ; CHECK: Bcc 1, %bb.1, implicit $nzcv + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4 + %3:gpr(s64) = G_CONSTANT i64 4 + %2:gpr(s64) = G_AND %0, %1 + %5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3 + %4:gpr(s1) = G_TRUNC %5(s32) + G_BRCOND %4(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR