diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -991,7 +991,7 @@ } /// Return a register which can be used as a bit to test in a TB(N)Z. -static Register getTestBitReg(Register Reg, uint64_t Bit, +static Register getTestBitReg(Register Reg, uint64_t &Bit, MachineRegisterInfo &MRI) { assert(Reg.isValid() && "Expected valid register!"); while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { @@ -1031,6 +1031,15 @@ } if (VRegAndVal) C = VRegAndVal->Value; + break; + } + case TargetOpcode::G_SHL: { + TestReg = MI->getOperand(1).getReg(); + auto VRegAndVal = + getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); + if (VRegAndVal) + C = VRegAndVal->Value; + break; } } @@ -1049,6 +1058,14 @@ if ((*C >> Bit) & 1) NextReg = TestReg; break; + case TargetOpcode::G_SHL: + // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in + // the type of the register. + if (*C <= Bit && (Bit - *C) < MRI.getType(TestReg).getSizeInBits()) { + NextReg = TestReg; + Bit = Bit - *C; + } + break; } // Check if we found anything worth folding. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir @@ -0,0 +1,114 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Check folding a G_SHL into a G_BRCOND which has been matched as a TB(N)Z. +... +--- +name: fold_shl +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: fold_shl + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr64all = COPY $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %copy.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: TBNZW [[COPY1]], 2, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s64) = COPY $x0 + %bit:gpr(s64) = G_CONSTANT i64 8 + %zero:gpr(s64) = G_CONSTANT i64 0 + + ; tbnz (shl x, 1), 3 == tbnz x, 2 + %fold_cst:gpr(s64) = G_CONSTANT i64 1 + %fold_me:gpr(s64) = G_SHL %copy, %fold_cst + + %and:gpr(s64) = G_AND %fold_me, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR +... +--- +name: dont_fold_shl_1 +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: dont_fold_shl_1 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr64 = COPY $x0 + ; CHECK: %fold_me:gpr64 = UBFMXri %copy, 59, 58 + ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] + ; CHECK: TBNZW [[COPY2]], 3, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s64) = COPY $x0 + %bit:gpr(s64) = G_CONSTANT i64 8 + %zero:gpr(s64) = G_CONSTANT i64 0 + + ; 5 > 3, so we cannot do the transformation as above. + %fold_cst:gpr(s64) = G_CONSTANT i64 5 + %fold_me:gpr(s64) = G_SHL %copy, %fold_cst + + %and:gpr(s64) = G_AND %fold_me, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR +... +--- +name: dont_fold_shl_2 +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: dont_fold_shl_2 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr64 = COPY $x0 + ; CHECK: %fold_cst:gpr64 = MOVi64imm -5 + ; CHECK: %fold_me:gpr64 = LSLVXr %copy, %fold_cst + ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] + ; CHECK: TBNZW [[COPY2]], 3, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s64) = COPY $x0 + %bit:gpr(s64) = G_CONSTANT i64 8 + %zero:gpr(s64) = G_CONSTANT i64 0 + + ; Same case as above, except we wrap around. + %fold_cst:gpr(s64) = G_CONSTANT i64 -5 + %fold_me:gpr(s64) = G_SHL %copy, %fold_cst + + %and:gpr(s64) = G_AND %fold_me, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR