diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -1039,6 +1039,7 @@ C = VRegAndVal->Value; break; } + case TargetOpcode::G_ASHR: case TargetOpcode::G_SHL: { TestReg = MI->getOperand(1).getReg(); auto VRegAndVal = @@ -1056,6 +1057,7 @@ // We found a suitable instruction with a constant. Check to see if we can // walk through the instruction. Register NextReg; + unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits(); switch (Opc) { default: break; @@ -1067,11 +1069,19 @@ case TargetOpcode::G_SHL: // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in // the type of the register. - if (*C <= Bit && (Bit - *C) < MRI.getType(TestReg).getSizeInBits()) { + if (*C <= Bit && (Bit - *C) < TestRegSize) { NextReg = TestReg; Bit = Bit - *C; } break; + case TargetOpcode::G_ASHR: + // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits + // in x + NextReg = TestReg; + Bit = Bit + *C; + if (Bit >= TestRegSize) + Bit = TestRegSize - 1; + break; case TargetOpcode::G_XOR: // We can walk through a G_XOR by inverting whether we use tbz/tbnz when // appropriate. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir @@ -154,3 +154,108 @@ %second_use:gpr(s64) = G_OR %shl, %bit $x0 = COPY %second_use RET_ReallyLR implicit $x0 + +... +--- +name: fold_ashr_in_range +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: fold_ashr_in_range + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr64all = COPY $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %copy.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: TBNZW [[COPY1]], 4, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s64) = COPY $x0 + %bit:gpr(s64) = G_CONSTANT i64 8 + %zero:gpr(s64) = G_CONSTANT i64 0 + + ; tb(n)z (ashr x, c), b == tbz(x, b + c) when b+c <= the size of the type. + ; In this case, we should get 1 + 3 = 4 as the bit number. + %fold_cst:gpr(s64) = G_CONSTANT i64 1 + %fold_me:gpr(s64) = G_ASHR %copy, %fold_cst + + %and:gpr(s64) = G_AND %fold_me, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: fold_ashr_msb_1 +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: fold_ashr_msb_1 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: TBNZW %copy, 31, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s32) = COPY $w0 + %bit:gpr(s32) = G_CONSTANT i32 8 + %zero:gpr(s32) = G_CONSTANT i32 0 + + ; We should get a TBNZW with a 31 as the bit. + %fold_cst:gpr(s32) = G_CONSTANT i32 1234 + %fold_me:gpr(s32) = G_ASHR %copy, %fold_cst + + %and:gpr(s32) = G_AND %fold_me, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR + +... +--- +name: fold_ashr_msb_2 +alignment: 4 +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: fold_ashr_msb_2 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: %copy:gpr64 = COPY $x0 + ; CHECK: TBNZX %copy, 63, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s64) = COPY $x0 + %bit:gpr(s64) = G_CONSTANT i64 8 + %zero:gpr(s64) = G_CONSTANT i64 0 + + ; We should get a TBNZX with a 63 as the bit. + %fold_cst:gpr(s64) = G_CONSTANT i64 1234 + %fold_me:gpr(s64) = G_ASHR %copy, %fold_cst + + %and:gpr(s64) = G_AND %fold_me, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR