Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -491,8 +491,6 @@ /// bswap. bool matchLoadOrCombine(MachineInstr &MI, std::function &MatchInfo); - bool applyLoadOrCombine(MachineInstr &MI, - std::function &MatchInfo); bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); bool applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); @@ -507,6 +505,10 @@ MachineInstr &MI, SmallVectorImpl> &MatchInfo); + /// Use a function which takes in a MachineIRBuilder to perform a combine. + bool applyBuildFn(MachineInstr &MI, + std::function &MatchInfo); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -114,6 +114,8 @@ class GIApplyKindWithArgs; def register_matchinfo: GIDefMatchData<"Register">; +def build_fn_matchinfo : +GIDefMatchData<"std::function">; def copy_prop : GICombineRule< (defs root:$d), @@ -560,13 +562,11 @@ [{ return Helper.matchCombineInsertVecElts(*${root}, ${info}); }]), (apply [{ return Helper.applyCombineInsertVecElts(*${root}, ${info}); }])>; -def load_or_combine_matchdata : -GIDefMatchData<"std::function">; def load_or_combine : GICombineRule< - (defs root:$root, load_or_combine_matchdata:$info), + (defs root:$root, build_fn_matchinfo:$info), (match (wip_match_opcode G_OR):$root, [{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]), - (apply [{ return Helper.applyLoadOrCombine(*${root}, ${info}); }])>; + (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>; def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">; def extend_through_phis : GICombineRule< Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3862,7 +3862,7 @@ MI.eraseFromParent(); } -bool CombinerHelper::applyLoadOrCombine( +bool CombinerHelper::applyBuildFn( MachineInstr &MI, std::function &MatchInfo) { Builder.setInstrAndDebugLoc(MI); MatchInfo(Builder); Index: llvm/lib/Target/AArch64/AArch64Combine.td =================================================================== --- llvm/lib/Target/AArch64/AArch64Combine.td +++ llvm/lib/Target/AArch64/AArch64Combine.td @@ -154,6 +154,14 @@ def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>; +def bitfield_extract_from_sext_inreg : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_SEXT_INREG):$root, + [{ return matchBitfieldExtractFromSExtInReg(*${root}, MRI, ${info}); }]), + (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>; + +def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg]>; + // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. @@ -172,6 +180,7 @@ hoist_logic_op_with_same_opcode_hands, redundant_and, xor_of_and_with_same_reg, extractvecelt_pairwise_add, redundant_or, - mul_const, redundant_sext_inreg]> { + mul_const, redundant_sext_inreg, + form_bitfield_extract]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } Index: llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineDominators.h" @@ -36,6 +37,7 @@ #define DEBUG_TYPE "aarch64-postlegalizer-combiner" using namespace llvm; +using namespace MIPatternMatch; /// This combine tries do what performExtractVectorEltCombine does in SDAG. /// Rewrite for pairwise fadd pattern @@ -238,6 +240,34 @@ return true; } +/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift. +static bool matchBitfieldExtractFromSExtInReg( + MachineInstr &MI, MachineRegisterInfo &MRI, + std::function &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + int64_t Width = MI.getOperand(2).getImm(); + LLT Ty = MRI.getType(Src); + assert((Ty == LLT::scalar(32) || Ty == LLT::scalar(64)) && + "Unexpected type for G_SEXT_INREG?"); + Register ShiftSrc; + int64_t ShiftImm; + if (!mi_match( + Src, MRI, + m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)), + m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)))))) + return false; + if (ShiftImm < 0 || ShiftImm + Width > Ty.getSizeInBits()) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + auto Cst1 = B.buildConstant(Ty, ShiftImm); + auto Cst2 = B.buildConstant(Ty, ShiftImm + Width - 1); + B.buildInstr(TargetOpcode::G_SBFX, {Dst}, {ShiftSrc, Cst1, Cst2}); + }; + return true; +} + #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS Index: llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-sextinreg.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-sextinreg.mir @@ -0,0 +1,153 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +# Check that we can fold a G_SEXT_INREG fed by a G_ASHR/G_LSHR into a G_SBFX. + +... +--- +name: sextinreg_ashr_to_sbfx +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: sextinreg_ashr_to_sbfx + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK: %sext_inreg:_(s32) = G_SBFX %x, [[C]], [[C1]] + ; CHECK: $w0 = COPY %sext_inreg(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %lsb:_(s32) = G_CONSTANT i32 5 + %shift:_(s32) = G_ASHR %x, %lsb + %sext_inreg:_(s32) = G_SEXT_INREG %shift, 10 + $w0 = COPY %sext_inreg + RET_ReallyLR implicit $w0 + +... +--- +name: sextinreg_lshr_to_sbfx +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: sextinreg_lshr_to_sbfx + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK: %sext_inreg:_(s32) = G_SBFX %x, [[C]], [[C1]] + ; CHECK: $w0 = COPY %sext_inreg(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %lsb:_(s32) = G_CONSTANT i32 5 + %shift:_(s32) = G_LSHR %x, %lsb + %sext_inreg:_(s32) = G_SEXT_INREG %shift, 10 + $w0 = COPY %sext_inreg + RET_ReallyLR implicit $w0 + + +... +--- +name: dont_apply_no_constant +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + ; AArch64 needs a constant on the shift for this combine. + + ; CHECK-LABEL: name: dont_apply_no_constant + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %y:_(s32) = COPY $w0 + ; CHECK: %shift:_(s32) = G_LSHR %x, %y(s32) + ; CHECK: %sext_inreg:_(s32) = G_SEXT_INREG %shift, 10 + ; CHECK: $w0 = COPY %sext_inreg(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w0 + %shift:_(s32) = G_LSHR %x, %y + %sext_inreg:_(s32) = G_SEXT_INREG %shift, 10 + $w0 = COPY %sext_inreg + RET_ReallyLR implicit $w0 + +... +--- +name: dont_apply_shift_imm_too_large +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + + ; LSB must be in 0-31. + + ; CHECK-LABEL: name: dont_apply_shift_imm_too_large + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %lsb:_(s32) = G_CONSTANT i32 32 + ; CHECK: %shift:_(s32) = G_ASHR %x, %lsb(s32) + ; CHECK: %sext_inreg:_(s32) = G_SEXT_INREG %shift, 1 + ; CHECK: $w0 = COPY %sext_inreg(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %lsb:_(s32) = G_CONSTANT i32 32 + %shift:_(s32) = G_ASHR %x, %lsb + %sext_inreg:_(s32) = G_SEXT_INREG %shift, 1 + $w0 = COPY %sext_inreg + RET_ReallyLR implicit $w0 + +... +--- +name: dont_apply_negative_shift_imm +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + + ; LSB must be in 0-31. + + ; CHECK-LABEL: name: dont_apply_negative_shift_imm + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %lsb:_(s32) = G_CONSTANT i32 -1 + ; CHECK: %shift:_(s32) = G_ASHR %x, %lsb(s32) + ; CHECK: %sext_inreg:_(s32) = G_SEXT_INREG %shift, 1 + ; CHECK: $w0 = COPY %sext_inreg(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %lsb:_(s32) = G_CONSTANT i32 -1 + %shift:_(s32) = G_ASHR %x, %lsb + %sext_inreg:_(s32) = G_SEXT_INREG %shift, 1 + $w0 = COPY %sext_inreg + RET_ReallyLR implicit $w0 + +... +--- +name: dont_apply_more_than_one_use +tracksRegLiveness: true +legalized: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: dont_apply_more_than_one_use + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s32) = COPY $w0 + ; CHECK: %lsb:_(s32) = G_CONSTANT i32 1 + ; CHECK: %shift:_(s32) = G_ASHR %x, %lsb(s32) + ; CHECK: %sext_inreg:_(s32) = G_SEXT_INREG %shift, 1 + ; CHECK: %mul:_(s32) = G_MUL %shift, %sext_inreg + ; CHECK: $w0 = COPY %mul(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %lsb:_(s32) = G_CONSTANT i32 1 + %shift:_(s32) = G_ASHR %x, %lsb + %sext_inreg:_(s32) = G_SEXT_INREG %shift, 1 + %mul:_(s32) = G_MUL %shift, %sext_inreg + $w0 = COPY %mul + RET_ReallyLR implicit $w0