Index: llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -988,6 +988,27 @@ } } +/// Return a register which can be used as a bit to test in a TB(N)Z. +static Register getTestBitReg(Register Reg, MachineRegisterInfo &MRI) { + for (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI); MI; + MI = getDefIgnoringCopies(Reg, MRI)) { + unsigned Opc = MI->getOpcode(); + Register NextReg; + + // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. + if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT) + NextReg = MI->getOperand(1).getReg(); + + // Did we find something worth folding? + if (!NextReg.isValid() || !MRI.hasOneUse(NextReg)) + break; + + // NextReg is worth folding. Keep looking. + Reg = NextReg; + } + return Reg; +} + bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred, MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const { @@ -1016,7 +1037,6 @@ return false; MachineRegisterInfo &MRI = *MIB.getMRI(); - Register TestReg = AndInst->getOperand(1).getReg(); // Only support EQ and NE. If we have LT, then it *is* possible to fold, but // we don't want to do this. When we have an AND and LT, we need a TST/ANDS, @@ -1032,7 +1052,11 @@ getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI); if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value)) return false; + + // Try to optimize the TB(N)Z. uint64_t Bit = Log2_64(static_cast(MaybeBit->Value)); + Register TestReg = AndInst->getOperand(1).getReg(); + TestReg = getTestBitReg(TestReg, MRI); // Choose the correct TB(N)Z opcode to use. unsigned Opc = 0; Index: llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir @@ -0,0 +1,136 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Check that we can continue matching when we are in a situation where we will +# emit a TB(N)Z. +... +--- +name: fold_zext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: fold_zext + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $x0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: TBNZW %copy, 3, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s32) = COPY $w0 + %bit:gpr(s64) = G_CONSTANT i64 8 + %zero:gpr(s64) = G_CONSTANT i64 0 + %fold_me:gpr(s64) = G_ZEXT %copy(s32) + %and:gpr(s64) = G_AND %fold_me, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR +... +--- +name: fold_anyext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: fold_anyext + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $x0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: TBNZW %copy, 3, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s32) = COPY $w0 + %bit:gpr(s64) = G_CONSTANT i64 8 + %zero:gpr(s64) = G_CONSTANT i64 0 + %fold_me:gpr(s64) = G_ANYEXT %copy(s32) + %and:gpr(s64) = G_AND %fold_me, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR +... +--- +name: fold_multiple +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: fold_multiple + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $h0 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub + ; CHECK: %copy:gpr32all = COPY [[SUBREG_TO_REG]] + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %copy + ; CHECK: TBNZW [[COPY]], 3, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $h0 + %copy:gpr(s16) = COPY $h0 + %bit:gpr(s64) = G_CONSTANT i64 8 + %zero:gpr(s64) = G_CONSTANT i64 0 + %ext1:gpr(s32) = G_ZEXT %copy(s16) + %ext2:gpr(s64) = G_ANYEXT %ext1(s32) + %and:gpr(s64) = G_AND %ext2, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR +... +--- +name: dont_fold_more_than_one_use +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_fold_more_than_one_use + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $x0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %copy, %subreg.sub_32 + ; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: TBNZW %copy, 3, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: $x0 = COPY %zext + ; CHECK: RET_ReallyLR implicit $x0 + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s32) = COPY $w0 + %bit:gpr(s64) = G_CONSTANT i64 8 + %zero:gpr(s64) = G_CONSTANT i64 0 + %zext:gpr(s64) = G_ZEXT %copy(s32) + %and:gpr(s64) = G_AND %zext, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + $x0 = COPY %zext:gpr(s64) + RET_ReallyLR implicit $x0