diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -1016,18 +1016,13 @@ return false; MachineRegisterInfo &MRI = *MIB.getMRI(); - unsigned Opc = 0; Register TestReg = AndInst->getOperand(1).getReg(); - unsigned TestSize = MRI.getType(TestReg).getSizeInBits(); // Only support EQ and NE. If we have LT, then it *is* possible to fold, but // we don't want to do this. When we have an AND and LT, we need a TST/ANDS, // so folding would be redundant. - if (Pred == CmpInst::Predicate::ICMP_EQ) - Opc = TestSize == 32 ? AArch64::TBZW : AArch64::TBZX; - else if (Pred == CmpInst::Predicate::ICMP_NE) - Opc = TestSize == 32 ? AArch64::TBNZW : AArch64::TBNZX; - else + if (Pred != CmpInst::Predicate::ICMP_EQ && + Pred != CmpInst::Predicate::ICMP_NE) return false; // Check if the AND has a constant on its RHS which we can use as a mask. @@ -1039,6 +1034,20 @@ return false; uint64_t Bit = Log2_64(static_cast(MaybeBit->Value)); + // Choose the correct TB(N)Z opcode to use. + unsigned Opc = 0; + if (Bit < 32) { + // When the bit is less than 32, we have to use a TBZW even if we're on a 64 + // bit register. + Opc = Pred == CmpInst::Predicate::ICMP_EQ ? AArch64::TBZW : AArch64::TBNZW; + TestReg = narrowExtendRegIfNeeded(TestReg, MIB); + } else { + // Same idea for when Bit >= 32. We don't have to narrow here, because if + // Bit > 32, then the G_CONSTANT must be outside the range of valid 32-bit + // values. So, we must have a s64. + Opc = Pred == CmpInst::Predicate::ICMP_EQ ? AArch64::TBZX : AArch64::TBNZX; + } + // Construct the branch. auto BranchMI = MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir @@ -9,16 +9,16 @@ # If all of these hold, we should produce a tbnz or a tbz. ... --- -name: tbnz_and_s64 +name: tbnzx_and alignment: 4 legalized: true regBankSelected: true body: | - ; CHECK-LABEL: name: tbnz_and_s64 + ; CHECK-LABEL: name: tbnzx_and ; CHECK: bb.0: ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: TBNZX [[COPY]], 3, %bb.1 + ; CHECK: TBNZX [[COPY]], 33, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR @@ -26,7 +26,7 @@ successors: %bb.0, %bb.1 liveins: $x0 %0:gpr(s64) = COPY $x0 - %1:gpr(s64) = G_CONSTANT i64 8 ; Power of 2 => TBNZ uses 3 as mask + %1:gpr(s64) = G_CONSTANT i64 8589934592 ; Bit number 33 => TBNZX %3:gpr(s64) = G_CONSTANT i64 0 %2:gpr(s64) = G_AND %0, %1 %5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3 @@ -38,18 +38,18 @@ ... --- -name: tbz_and_s64 +name: tbzx_and alignment: 4 legalized: true regBankSelected: true tracksRegLiveness: true body: | - ; CHECK-LABEL: name: tbz_and_s64 + ; CHECK-LABEL: name: tbzx_and ; CHECK: bb.0: ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: TBZX [[COPY]], 4, %bb.1 + ; CHECK: TBZX [[COPY]], 33, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR @@ -57,7 +57,7 @@ successors: %bb.0, %bb.1 liveins: $x0 %0:gpr(s64) = COPY $x0 - %1:gpr(s64) = G_CONSTANT i64 16 ; Power of 2 => TBNZ uses 4 as mask + %1:gpr(s64) = G_CONSTANT i64 8589934592 ; Bit number 33 => TBNZX %3:gpr(s64) = G_CONSTANT i64 0 %2:gpr(s64) = G_AND %0, %1 %5:gpr(s32) = G_ICMP intpred(eq), %2(s64), %3 @@ -69,13 +69,13 @@ ... --- -name: tbnz_and_s32 +name: tbnzw_and alignment: 4 legalized: true regBankSelected: true tracksRegLiveness: true body: | - ; CHECK-LABEL: name: tbnz_and_s32 + ; CHECK-LABEL: name: tbnzw_and ; CHECK: bb.0: ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: liveins: $w0 @@ -88,7 +88,7 @@ successors: %bb.0, %bb.1 liveins: $w0 %0:gpr(s32) = COPY $w0 - %1:gpr(s32) = G_CONSTANT i32 1 ; Power of 2 => TBNZ uses 0 as mask + %1:gpr(s32) = G_CONSTANT i32 1 %3:gpr(s32) = G_CONSTANT i32 0 %2:gpr(s32) = G_AND %0, %1 %5:gpr(s32) = G_ICMP intpred(ne), %2(s32), %3 @@ -100,13 +100,13 @@ ... --- -name: tbz_and_s32 +name: tbzw_and alignment: 4 legalized: true regBankSelected: true tracksRegLiveness: true body: | - ; CHECK-LABEL: name: tbz_and_s32 + ; CHECK-LABEL: name: tbzw_and ; CHECK: bb.0: ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: liveins: $w0 @@ -119,7 +119,7 @@ successors: %bb.0, %bb.1 liveins: $w0 %0:gpr(s32) = COPY $w0 - %1:gpr(s32) = G_CONSTANT i32 1 ; Power of 2 => TBNZ uses 0 as mask + %1:gpr(s32) = G_CONSTANT i32 1 %3:gpr(s32) = G_CONSTANT i32 0 %2:gpr(s32) = G_AND %0, %1 %5:gpr(s32) = G_ICMP intpred(eq), %2(s32), %3