diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -541,6 +541,10 @@ bool matchBitfieldExtractFromAnd( MachineInstr &MI, std::function &MatchInfo); + /// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width + bool matchBitfieldExtractFromShr( + MachineInstr &MI, std::function &MatchInfo); + /// Reassociate pointer calculations with G_ADD involved, to allow better /// addressing mode usage. bool matchReassocPtrAdd(MachineInstr &MI, diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -652,8 +652,15 @@ [{ return Helper.matchBitfieldExtractFromSExtInReg(*${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; +def bitfield_extract_from_shr : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_ASHR, G_LSHR):$root, + [{ return Helper.matchBitfieldExtractFromShr(*${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg, - bitfield_extract_from_and]>; + bitfield_extract_from_and, + bitfield_extract_from_shr]>; def reassoc_ptradd : GICombineRule< (defs root:$root, build_fn_matchinfo:$matchinfo), (match (wip_match_opcode G_PTR_ADD):$root, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4128,6 +4128,51 @@ return true; } +bool CombinerHelper::matchBitfieldExtractFromShr( + MachineInstr &MI, std::function &MatchInfo) { + const unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR); + + const Register Dst = MI.getOperand(0).getReg(); + + const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR + ? TargetOpcode::G_SBFX + : TargetOpcode::G_UBFX; + + // Check if the type we would use for the extract is legal + LLT Ty = MRI.getType(Dst); + LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}})) + return false; + + Register ShlSrc; + int64_t ShrAmt; + int64_t ShlAmt; + const unsigned Size = Ty.getScalarSizeInBits(); + + // Try to match shr (shl x, c1), c2 + if (!mi_match(Dst, MRI, + m_BinOp(Opcode, + m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))), + m_ICst(ShrAmt)))) + return false; + + // Make sure that the shift sizes can fit a bitfield extract. + if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size) + return false; + + // Calculate start position and width of the extract + const int64_t Pos = ShrAmt - ShlAmt; + const int64_t Width = Size - ShrAmt; + + MatchInfo = [=](MachineIRBuilder &B) { + auto WidthCst = B.buildConstant(ExtractTy, Width); + auto PosCst = B.buildConstant(ExtractTy, Pos); + B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst}); + }; + return true; +} + bool CombinerHelper::reassociationCanBreakAddressingModePattern( MachineInstr &PtrAdd) { assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-shr.mir @@ -0,0 +1,178 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +# Check that we can fold a G_ASHR/G_LSHR fed by a G_SHL into a G_SBFX/G_UBFX. + +--- +name: apply_ashr_shl_to_sbfx +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: apply_ashr_shl_to_sbfx + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C1]](s32), [[C]] + ; CHECK: $w0 = COPY [[SBFX]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 5 + %2:_(s32) = G_CONSTANT i32 16 + %3:_(s32) = G_SHL %0, %1 + %4:_(s32) = G_ASHR %3, %2 + $w0 = COPY %4(s32) +... + +--- +name: apply_ashr_shl_to_sbfx_lower_bound +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: apply_ashr_shl_to_sbfx_lower_bound + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C]](s32), [[C]] + ; CHECK: $w0 = COPY [[SBFX]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 30 + %2:_(s32) = G_CONSTANT i32 31 + %3:_(s32) = G_SHL %0, %1 + %4:_(s32) = G_ASHR %3, %2 + $w0 = COPY %4(s32) +... + +--- +name: apply_ashr_shl_to_sbfx_upper_bound +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: apply_ashr_shl_to_sbfx_upper_bound + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C1]](s32), [[C]] + ; CHECK: $w0 = COPY [[SBFX]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 31 + %3:_(s32) = G_SHL %0, %1 + %4:_(s32) = G_ASHR %3, %2 + $w0 = COPY %4(s32) +... + +--- +name: apply_lshr_shl_to_ubfx +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: apply_lshr_shl_to_ubfx + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]] + ; CHECK: $w0 = COPY [[UBFX]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 5 + %2:_(s32) = G_CONSTANT i32 16 + %3:_(s32) = G_SHL %0, %1 + %4:_(s32) = G_LSHR %3, %2 + $w0 = COPY %4(s32) +... + +--- +name: apply_lshr_shl_to_ubfx_lower_bound +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: apply_lshr_shl_to_ubfx_lower_bound + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C]](s32), [[C]] + ; CHECK: $w0 = COPY [[UBFX]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 30 + %2:_(s32) = G_CONSTANT i32 31 + %3:_(s32) = G_SHL %0, %1 + %4:_(s32) = G_LSHR %3, %2 + $w0 = COPY %4(s32) +... + +--- +name: apply_lshr_shl_to_ubfx_upper_bound +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: apply_lshr_shl_to_ubfx_upper_bound + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]] + ; CHECK: $w0 = COPY [[UBFX]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 31 + %3:_(s32) = G_SHL %0, %1 + %4:_(s32) = G_LSHR %3, %2 + $w0 = COPY %4(s32) +... + +--- +name: dont_apply_pos_out_of_bounds +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: dont_apply_pos_out_of_bounds + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32) + ; CHECK: $w0 = COPY [[ASHR]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 5 + %2:_(s32) = G_CONSTANT i32 2 + %3:_(s32) = G_SHL %0, %1 + %4:_(s32) = G_ASHR %3, %2 + $w0 = COPY %4(s32) +... + +--- +name: dont_apply_no_constant +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: dont_apply_no_constant + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; CHECK: $w0 = COPY [[ASHR]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_CONSTANT i32 2 + %3:_(s32) = G_SHL %0, %1 + %4:_(s32) = G_ASHR %3, %2 + $w0 = COPY %4(s32) +... + +--- +name: dont_apply_more_than_one_use +legalized: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: dont_apply_more_than_one_use + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32) + ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SHL]], [[ASHR]] + ; CHECK: $w0 = COPY [[MUL]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 5 + %2:_(s32) = G_CONSTANT i32 16 + %3:_(s32) = G_SHL %0, %1 + %4:_(s32) = G_ASHR %3, %2 + %5:_(s32) = G_MUL %3, %4 + $w0 = COPY %5(s32) +... diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll --- a/llvm/test/CodeGen/AArch64/arm64-rev.ll +++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -156,8 +156,7 @@ ; ; GISEL-LABEL: test_rev_x_srl32_shift: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: lsl x8, x0, #33 -; GISEL-NEXT: lsr x8, x8, #35 +; GISEL-NEXT: ubfx x8, x0, #2, #29 ; GISEL-NEXT: rev x8, x8 ; GISEL-NEXT: lsr x0, x8, #32 ; GISEL-NEXT: ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll @@ -399,8 +399,7 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b32 s0, s0, 31 -; GFX6-NEXT: s_lshr_b32 s0, s0, 31 +; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -1059,8 +1058,7 @@ ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b32 s0, s0, 9 -; GFX6-NEXT: s_lshr_b32 s0, s0, 11 +; GFX6-NEXT: s_bfe_u32 s0, s0, 0x150002 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm