diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -1222,38 +1222,52 @@ Register LHS = CCMI->getOperand(2).getReg(); Register RHS = CCMI->getOperand(3).getReg(); auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); - if (!VRegAndVal) + if (!VRegAndVal) { std::swap(RHS, LHS); + VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); + } MachineIRBuilder MIB(I); - VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); + const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate(); + MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI); + unsigned LHSOpc = LHSMI->getOpcode(); + + // When we have a greater-than comparison, we can just test if the msb is + // zero. + // + // Note that we don't want to do this when we have a G_AND because it can + // become a tst. The tst will make the test bit in the TB(N)Z redundant. + if (VRegAndVal && VRegAndVal->Value == -1 && Pred == CmpInst::ICMP_SGT && + LHSOpc != TargetOpcode::G_AND) { + uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; + emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB); + I.eraseFromParent(); + return true; + } + if (!VRegAndVal || VRegAndVal->Value != 0) { // If we can't select a CBZ then emit a cmp + Bcc. if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB)) return false; - const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( - (CmpInst::Predicate)CCMI->getOperand(1).getPredicate()); + const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred); MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); I.eraseFromParent(); return true; } - // Try to fold things into the branch. - const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate(); - MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI); if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB, MIB)) { I.eraseFromParent(); return true; } - // When we have a less than comparison, we can just test if the last bit - // is not zero. + // When we have a less than comparison, we can just test if the msb is not + // zero. // // Note that we don't want to do this when we have a G_AND because it can // become a tst. The tst will make the test bit in the TB(N)Z redundant. - if (Pred == CmpInst::ICMP_SLT && LHSMI->getOpcode() != TargetOpcode::G_AND) { + if (Pred == CmpInst::ICMP_SLT && LHSOpc != TargetOpcode::G_AND) { uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB); I.eraseFromParent(); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir @@ -0,0 +1,122 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Test that we can produce a tbz when we have a sgt compare against -1. +# +# The bit tested should be the size of the test register minus 1. +# + +... +--- +name: tbzx_sgt +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: tbzx_sgt + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr64 = COPY $x0 + ; CHECK: TBZX %copy, 63, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s64) = COPY $x0 + %negative_one:gpr(s64) = G_CONSTANT i64 -1 + %cmp:gpr(s32) = G_ICMP intpred(sgt), %copy(s64), %negative_one + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: tbzw_sgt +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: tbzw_sgt + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: TBZW %copy, 31, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s32) = COPY $w0 + %negative_one:gpr(s32) = G_CONSTANT i32 -1 + %cmp:gpr(s32) = G_ICMP intpred(sgt), %copy(s32), %negative_one + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: no_tbz_not_negative_one +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: no_tbz_not_negative_one + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri %copy, 1, 0, implicit-def $nzcv + ; CHECK: Bcc 12, %bb.1, implicit $nzcv + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s32) = COPY $w0 + %one:gpr(s32) = G_CONSTANT i32 1 + %cmp:gpr(s32) = G_ICMP intpred(sgt), %copy(s32), %one + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: dont_fold_and +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: dont_fold_and + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr64 = COPY $x0 + ; CHECK: %negative_one:gpr64 = MOVi64imm -1 + ; CHECK: %and:gpr64common = ANDXri %copy, 8000 + ; CHECK: $xzr = SUBSXrr %and, %negative_one, implicit-def $nzcv + ; CHECK: Bcc 12, %bb.1, implicit $nzcv + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s64) = COPY $x0 + %bit:gpr(s64) = G_CONSTANT i64 8 + %negative_one:gpr(s64) = G_CONSTANT i64 -1 + %c:gpr(s64) = G_CONSTANT i64 8 + %and:gpr(s64) = G_AND %copy, %bit + %cmp:gpr(s32) = G_ICMP intpred(sgt), %and(s64), %negative_one + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR