Index: llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -1040,6 +1040,7 @@ break; } case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: case TargetOpcode::G_SHL: { TestReg = MI->getOperand(1).getReg(); auto VRegAndVal = @@ -1082,6 +1083,13 @@ if (Bit >= TestRegSize) Bit = TestRegSize - 1; break; + case TargetOpcode::G_LSHR: + // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x + if ((Bit + *C) < TestRegSize) { + NextReg = TestReg; + Bit = Bit + *C; + } + break; case TargetOpcode::G_XOR: // We can walk through a G_XOR by inverting whether we use tbz/tbnz when // appropriate. Index: llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir @@ -259,3 +259,144 @@ G_BR %bb.0 bb.1: RET_ReallyLR + +... +--- +name: fold_lshr +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: fold_lshr + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: TBNZW %copy, 4, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s32) = COPY $w0 + %bit:gpr(s32) = G_CONSTANT i32 8 + %zero:gpr(s32) = G_CONSTANT i32 0 + + ; We should get 4 as the test bit. + %fold_cst:gpr(s32) = G_CONSTANT i32 1 + %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst + + %and:gpr(s32) = G_AND %fold_me, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: fold_lshr_2 +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: fold_lshr_2 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr64 = COPY $x0 + ; CHECK: TBNZX %copy, 32, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s64) = COPY $x0 + %bit:gpr(s64) = G_CONSTANT i64 8 + %zero:gpr(s64) = G_CONSTANT i64 0 + + ; We're testing a s64. + ; 3 + 29 = 32, which is less than 63, so we can fold. + %fold_cst:gpr(s64) = G_CONSTANT i64 29 + %fold_me:gpr(s64) = G_LSHR %copy, %fold_cst + + %and:gpr(s64) = G_AND %fold_me, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: dont_fold_lshr +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: dont_fold_lshr + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %fold_cst:gpr32 = MOVi32imm 29 + ; CHECK: %fold_me:gpr32 = LSRVWr %copy, %fold_cst + ; CHECK: TBNZW %fold_me, 3, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s32) = COPY $w0 + %bit:gpr(s32) = G_CONSTANT i32 8 + %zero:gpr(s32) = G_CONSTANT i32 0 + + ; We're testing a s32. + ; 3 + 29 = 32, which is greater than 31, so we don't fold. + %fold_cst:gpr(s32) = G_CONSTANT i32 29 + %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst + + %and:gpr(s32) = G_AND %fold_me, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: lshr_negative +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: lshr_negative + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: TBNZW %copy, 2, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s32) = COPY $w0 + %bit:gpr(s32) = G_CONSTANT i32 8 + %zero:gpr(s32) = G_CONSTANT i32 0 + + ; Constant becomes very large and wraps around. Since it's larger than the + ; bit width, that means the LSHR is poison, so we can still fold. + %fold_cst:gpr(s32) = G_CONSTANT i32 -1 + %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst + + %and:gpr(s32) = G_AND %fold_me, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR