diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -501,7 +501,7 @@ (defs root:$dst, register_matchinfo:$info), (match (G_INTTOPTR $t, $ptr), (G_PTRTOINT $dst, $t):$mi, - [{ ${info} = ${ptr}.getReg(); }]), + [{ ${info} = ${ptr}.getReg(); return true; }]), (apply [{ Helper.applyCombineP2IToI2P(*${mi}, ${info}); }]) >; @@ -615,7 +615,7 @@ (defs root:$dst, register_matchinfo:$matchinfo), (match (G_FNEG $t, $src), (G_FNEG $dst, $t):$mi, - [{ ${matchinfo} = ${src}.getReg(); }]), + [{ ${matchinfo} = ${src}.getReg(); return true; }]), (apply [{ Helper.replaceSingleDefInstWithReg(*${mi}, ${matchinfo}); }]) >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -10,31 +10,31 @@ // TODO: This really belongs after legalization after scalarization. -def fmin_fmax_legacy_matchdata : GIDefMatchData<"AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo">; +def fmin_fmax_legacy_matchdata : GIDefMatchData<"FMinFMaxLegacyInfo">; let Predicates = [HasFminFmaxLegacy] in def fcmp_select_to_fmin_fmax_legacy : GICombineRule< (defs root:$select, fmin_fmax_legacy_matchdata:$matchinfo), (match (wip_match_opcode G_SELECT):$select, - [{ return PostLegalizerHelper.matchFMinFMaxLegacy(*${select}, ${matchinfo}); }]), - (apply [{ PostLegalizerHelper.applySelectFCmpToFMinToFMaxLegacy(*${select}, ${matchinfo}); }])>; + [{ return matchFMinFMaxLegacy(*${select}, ${matchinfo}); }]), + (apply [{ applySelectFCmpToFMinToFMaxLegacy(*${select}, ${matchinfo}); }])>; def uchar_to_float : GICombineRule< (defs root:$itofp), (match (wip_match_opcode G_UITOFP, G_SITOFP):$itofp, - [{ return PostLegalizerHelper.matchUCharToFloat(*${itofp}); }]), - (apply [{ PostLegalizerHelper.applyUCharToFloat(*${itofp}); }])>; + [{ return matchUCharToFloat(*${itofp}); }]), + (apply [{ applyUCharToFloat(*${itofp}); }])>; def rcp_sqrt_to_rsq : GICombineRule< (defs root:$rcp, build_fn_matchinfo:$matchinfo), (match (wip_match_opcode G_INTRINSIC, G_FSQRT):$rcp, - [{ return PostLegalizerHelper.matchRcpSqrtToRsq(*${rcp}, ${matchinfo}); }]), + [{ return matchRcpSqrtToRsq(*${rcp}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFn(*${rcp}, ${matchinfo}); }])>; -def cvt_f32_ubyteN_matchdata : GIDefMatchData<"AMDGPUPostLegalizerCombinerHelper::CvtF32UByteMatchInfo">; +def cvt_f32_ubyteN_matchdata : GIDefMatchData<"CvtF32UByteMatchInfo">; def cvt_f32_ubyteN : GICombineRule< (defs root:$cvt_f32_ubyteN, cvt_f32_ubyteN_matchdata:$matchinfo), @@ -42,18 +42,18 @@ G_AMDGPU_CVT_F32_UBYTE1, G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3):$cvt_f32_ubyteN, - [{ return PostLegalizerHelper.matchCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }]), - (apply [{ PostLegalizerHelper.applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>; + [{ return matchCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }]), + (apply [{ applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>; -def clamp_i64_to_i16_matchdata : GIDefMatchData<"AMDGPUPreLegalizerCombinerHelper::ClampI64ToI16MatchInfo">; +def clamp_i64_to_i16_matchdata : GIDefMatchData<"ClampI64ToI16MatchInfo">; def clamp_i64_to_i16 : GICombineRule< (defs root:$clamp_i64_to_i16, clamp_i64_to_i16_matchdata:$matchinfo), (match (wip_match_opcode G_TRUNC):$clamp_i64_to_i16, - [{ return PreLegalizerHelper.matchClampI64ToI16(*${clamp_i64_to_i16}, MRI, *MF, ${matchinfo}); }]), - (apply [{ PreLegalizerHelper.applyClampI64ToI16(*${clamp_i64_to_i16}, ${matchinfo}); }])>; + [{ return matchClampI64ToI16(*${clamp_i64_to_i16}, MRI, MF, ${matchinfo}); }]), + (apply [{ applyClampI64ToI16(*${clamp_i64_to_i16}, ${matchinfo}); }])>; -def med3_matchdata : GIDefMatchData<"AMDGPURegBankCombinerHelper::Med3MatchInfo">; +def med3_matchdata : GIDefMatchData<"Med3MatchInfo">; def int_minmax_to_med3 : GICombineRule< (defs root:$min_or_max, med3_matchdata:$matchinfo), @@ -61,8 +61,8 @@ G_SMIN, G_UMAX, G_UMIN):$min_or_max, - [{ return RegBankHelper.matchIntMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]), - (apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>; + [{ return matchIntMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]), + (apply [{ applyMed3(*${min_or_max}, ${matchinfo}); }])>; def fp_minmax_to_med3 : GICombineRule< (defs root:$min_or_max, med3_matchdata:$matchinfo), @@ -70,8 +70,8 @@ G_FMINNUM, G_FMAXNUM_IEEE, G_FMINNUM_IEEE):$min_or_max, - [{ return RegBankHelper.matchFPMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]), - (apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>; + [{ return matchFPMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]), + (apply [{ applyMed3(*${min_or_max}, ${matchinfo}); }])>; def fp_minmax_to_clamp : GICombineRule< (defs root:$min_or_max, register_matchinfo:$matchinfo), @@ -79,21 +79,21 @@ G_FMINNUM, G_FMAXNUM_IEEE, G_FMINNUM_IEEE):$min_or_max, - [{ return RegBankHelper.matchFPMinMaxToClamp(*${min_or_max}, ${matchinfo}); }]), - (apply [{ RegBankHelper.applyClamp(*${min_or_max}, ${matchinfo}); }])>; + [{ return matchFPMinMaxToClamp(*${min_or_max}, ${matchinfo}); }]), + (apply [{ applyClamp(*${min_or_max}, ${matchinfo}); }])>; def fmed3_intrinsic_to_clamp : GICombineRule< (defs root:$fmed3, register_matchinfo:$matchinfo), (match (wip_match_opcode G_AMDGPU_FMED3):$fmed3, - [{ return RegBankHelper.matchFPMed3ToClamp(*${fmed3}, ${matchinfo}); }]), - (apply [{ RegBankHelper.applyClamp(*${fmed3}, ${matchinfo}); }])>; + [{ return matchFPMed3ToClamp(*${fmed3}, ${matchinfo}); }]), + (apply [{ applyClamp(*${fmed3}, ${matchinfo}); }])>; def remove_fcanonicalize_matchinfo : GIDefMatchData<"Register">; def remove_fcanonicalize : GICombineRule< (defs root:$fcanonicalize, remove_fcanonicalize_matchinfo:$matchinfo), (match (wip_match_opcode G_FCANONICALIZE):$fcanonicalize, - [{ return PostLegalizerHelper.matchRemoveFcanonicalize(*${fcanonicalize}, ${matchinfo}); }]), + [{ return matchRemoveFcanonicalize(*${fcanonicalize}, ${matchinfo}); }]), (apply [{ Helper.replaceSingleDefInstWithReg(*${fcanonicalize}, ${matchinfo}); }])>; def foldable_fneg_matchdata : GIDefMatchData<"MachineInstr *">; @@ -109,8 +109,8 @@ def sign_extension_in_reg : GICombineRule< (defs root:$sign_inreg, sign_exension_in_reg_matchdata:$matchinfo), (match (wip_match_opcode G_SEXT_INREG):$sign_inreg, - [{ return PostLegalizerHelper.matchCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }]), - (apply [{ PostLegalizerHelper.applyCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }])>; + [{ return matchCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }]), + (apply [{ applyCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }])>; let Predicates = [Has16BitInsts, NotHasMed3_16] in { @@ -139,30 +139,21 @@ // Combines which should only apply on VI def gfx8_combines : GICombineGroup<[expand_promoted_fmed3]>; -def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper< - "AMDGPUGenPreLegalizerCombinerHelper", +def AMDGPUPreLegalizerCombiner: GICombinerHelper< + "AMDGPUPreLegalizerCombinerImpl", [all_combines, clamp_i64_to_i16, foldable_fneg]> { - let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule"; - let StateClass = "AMDGPUPreLegalizerCombinerHelperState"; - let AdditionalArguments = []; } -def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper< - "AMDGPUGenPostLegalizerCombinerHelper", +def AMDGPUPostLegalizerCombiner: GICombinerHelper< + "AMDGPUPostLegalizerCombinerImpl", [all_combines, gfx6gfx7_combines, gfx8_combines, uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg, rcp_sqrt_to_rsq, sign_extension_in_reg]> { - let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule"; - let StateClass = "AMDGPUPostLegalizerCombinerHelperState"; - let AdditionalArguments = []; } -def AMDGPURegBankCombinerHelper : GICombinerHelper< - "AMDGPUGenRegBankCombinerHelper", +def AMDGPURegBankCombiner : GICombinerHelper< + "AMDGPURegBankCombinerImpl", [unmerge_merge, unmerge_cst, unmerge_undef, zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain, fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp]> { - let DisableRuleOption = "amdgpuregbankcombiner-disable-rule"; - let StateClass = "AMDGPURegBankCombinerHelperState"; - let AdditionalArguments = []; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -1,4 +1,4 @@ -//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===// +//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -19,6 +19,8 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h" +#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/MachineDominators.h" @@ -26,26 +28,41 @@ #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/Target/TargetMachine.h" +#define GET_GICOMBINER_DEPS +#include "AMDGPUGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_DEPS + #define DEBUG_TYPE "amdgpu-postlegalizer-combiner" using namespace llvm; using namespace MIPatternMatch; -class AMDGPUPostLegalizerCombinerHelper { +namespace { +#define GET_GICOMBINER_TYPES +#include "AMDGPUGenPostLegalizeGICombiner.inc" +#undef GET_GICOMBINER_TYPES + +class AMDGPUPostLegalizerCombinerImpl : public GIMatchTableExecutor { protected: + const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig; + MachineIRBuilder &B; MachineFunction &MF; MachineRegisterInfo &MRI; - const GCNSubtarget &Subtarget; + const GCNSubtarget &STI; const SIInstrInfo &TII; AMDGPUCombinerHelper &Helper; + GISelChangeObserver &Observer; public: - AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, - AMDGPUCombinerHelper &Helper) - : B(B), MF(B.getMF()), MRI(*B.getMRI()), - Subtarget(MF.getSubtarget()), - TII(*Subtarget.getInstrInfo()), Helper(Helper){}; + AMDGPUPostLegalizerCombinerImpl( + const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig, + MachineIRBuilder &B, AMDGPUCombinerHelper &Helper, + GISelChangeObserver &Observer); + + static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; } + + bool tryCombineAll(MachineInstr &I) const; struct FMinFMaxLegacyInfo { Register LHS; @@ -56,15 +73,16 @@ }; // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize - bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info); + bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info) const; void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, - const FMinFMaxLegacyInfo &Info); + const FMinFMaxLegacyInfo &Info) const; - bool matchUCharToFloat(MachineInstr &MI); - void applyUCharToFloat(MachineInstr &MI); + bool matchUCharToFloat(MachineInstr &MI) const; + void applyUCharToFloat(MachineInstr &MI) const; - bool matchRcpSqrtToRsq(MachineInstr &MI, - std::function &MatchInfo); + bool + matchRcpSqrtToRsq(MachineInstr &MI, + std::function &MatchInfo) const; // FIXME: Should be able to have 2 separate matchdatas rather than custom // struct boilerplate. @@ -73,20 +91,53 @@ unsigned ShiftOffset; }; - bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo); + bool matchCvtF32UByteN(MachineInstr &MI, + CvtF32UByteMatchInfo &MatchInfo) const; void applyCvtF32UByteN(MachineInstr &MI, - const CvtF32UByteMatchInfo &MatchInfo); + const CvtF32UByteMatchInfo &MatchInfo) const; - bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg); + bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const; // Combine unsigned buffer load and signed extension instructions to generate // signed buffer laod instructions. - bool matchCombineSignExtendInReg(MachineInstr &MI, MachineInstr *&MatchInfo); - void applyCombineSignExtendInReg(MachineInstr &MI, MachineInstr *&MatchInfo); + bool matchCombineSignExtendInReg(MachineInstr &MI, + MachineInstr *&MatchInfo) const; + void applyCombineSignExtendInReg(MachineInstr &MI, + MachineInstr *&MatchInfo) const; + +private: +#define GET_GICOMBINER_PREDICATES_DECL +#define AMDGPUSubtarget GCNSubtarget +#include "AMDGPUGenPostLegalizeGICombiner.inc" +#undef GET_GICOMBINER_PREDICATES_DECL +#undef AMDGPUSubtarget + +#define GET_GICOMBINER_TEMPORARIES_DECL +#include "AMDGPUGenPostLegalizeGICombiner.inc" +#undef GET_GICOMBINER_TEMPORARIES_DECL }; -bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy( - MachineInstr &MI, FMinFMaxLegacyInfo &Info) { +#define GET_GICOMBINER_IMPL +#define AMDGPUSubtarget GCNSubtarget +#include "AMDGPUGenPostLegalizeGICombiner.inc" +#undef AMDGPUSubtarget +#undef GET_GICOMBINER_IMPL + +AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl( + const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig, + MachineIRBuilder &B, AMDGPUCombinerHelper &Helper, + GISelChangeObserver &Observer) + : RuleConfig(RuleConfig), B(B), MF(B.getMF()), MRI(*B.getMRI()), + STI(MF.getSubtarget()), TII(*STI.getInstrInfo()), + Helper(Helper), Observer(Observer), +#define GET_GICOMBINER_CONSTRUCTOR_INITS +#include "AMDGPUGenPostLegalizeGICombiner.inc" +#undef GET_GICOMBINER_CONSTRUCTOR_INITS +{ +} + +bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy( + MachineInstr &MI, FMinFMaxLegacyInfo &Info) const { // FIXME: Type predicate on pattern if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32)) return false; @@ -121,8 +172,8 @@ } } -void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy( - MachineInstr &MI, const FMinFMaxLegacyInfo &Info) { +void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinToFMaxLegacy( + MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const { B.setInstrAndDebugLoc(MI); auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) { B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); @@ -170,7 +221,8 @@ MI.eraseFromParent(); } -bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) { +bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat( + MachineInstr &MI) const { Register DstReg = MI.getOperand(0).getReg(); // TODO: We could try to match extracting the higher bytes, which would be @@ -189,7 +241,8 @@ return false; } -void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) { +void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat( + MachineInstr &MI) const { B.setInstrAndDebugLoc(MI); const LLT S32 = LLT::scalar(32); @@ -202,19 +255,20 @@ SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0); if (Ty == S32) { - B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, - {SrcReg}, MI.getFlags()); + B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg}, + MI.getFlags()); } else { - auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, - {SrcReg}, MI.getFlags()); + auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg}, + MI.getFlags()); B.buildFPTrunc(DstReg, Cvt0, MI.getFlags()); } MI.eraseFromParent(); } -bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq( - MachineInstr &MI, std::function &MatchInfo) { +bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq( + MachineInstr &MI, + std::function &MatchInfo) const { auto getRcpSrc = [=](const MachineInstr &MI) { MachineInstr *ResMI = nullptr; @@ -257,8 +311,8 @@ return false; } -bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN( - MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) { +bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN( + MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const { Register SrcReg = MI.getOperand(1).getReg(); // Look through G_ZEXT. @@ -285,8 +339,8 @@ return false; } -void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN( - MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) { +void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN( + MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const { B.setInstrAndDebugLoc(MI); unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8; @@ -303,8 +357,8 @@ MI.eraseFromParent(); } -bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize( - MachineInstr &MI, Register &Reg) { +bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize( + MachineInstr &MI, Register &Reg) const { const SITargetLowering *TLI = static_cast( MF.getSubtarget().getTargetLowering()); Reg = MI.getOperand(1).getReg(); @@ -317,8 +371,8 @@ // instructions. // Identify buffer_load_{u8, u16}. -bool AMDGPUPostLegalizerCombinerHelper::matchCombineSignExtendInReg( - MachineInstr &MI, MachineInstr *&SubwordBufferLoad) { +bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg( + MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const { Register Op0Reg = MI.getOperand(1).getReg(); SubwordBufferLoad = MRI.getVRegDef(Op0Reg); @@ -333,8 +387,8 @@ // Combine buffer_load_{u8, u16} and the sign extension instruction to generate // buffer_load_{i8, i16}. -void AMDGPUPostLegalizerCombinerHelper::applyCombineSignExtendInReg( - MachineInstr &MI, MachineInstr *&SubwordBufferLoad) { +void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg( + MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const { // Modify the opcode and the destination of buffer_load_{u8, u16}: // Replace the opcode. unsigned Opc = @@ -350,49 +404,19 @@ MI.eraseFromParent(); } -class AMDGPUPostLegalizerCombinerHelperState { -protected: - AMDGPUCombinerHelper &Helper; - AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper; - - // Note: pointer is necessary because Target Predicates use - // "Subtarget->" - const GCNSubtarget *Subtarget; - -public: - AMDGPUPostLegalizerCombinerHelperState( - AMDGPUCombinerHelper &Helper, - AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper, - const GCNSubtarget &Subtarget) - : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper), - Subtarget(&Subtarget) {} -}; - -#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS -#include "AMDGPUGenPostLegalizeGICombiner.inc" -#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS - -namespace { -#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H -#include "AMDGPUGenPostLegalizeGICombiner.inc" -#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H - class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo { GISelKnownBits *KB; MachineDominatorTree *MDT; - const GCNSubtarget &Subtarget; + AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig; public: - AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; - - AMDGPUPostLegalizerCombinerInfo(const GCNSubtarget &Subtarget, bool EnableOpt, - bool OptSize, bool MinSize, + AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, const AMDGPULegalizerInfo *LI, GISelKnownBits *KB, MachineDominatorTree *MDT) : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), - KB(KB), MDT(MDT), Subtarget(Subtarget) { - if (!GeneratedRuleCfg.parseCommandLineOption()) + KB(KB), MDT(MDT) { + if (!RuleConfig.parseCommandLineOption()) report_fatal_error("Invalid rule identifier"); } @@ -405,11 +429,10 @@ MachineIRBuilder &B) const { AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT, LInfo); - AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper); - AMDGPUGenPostLegalizerCombinerHelper Generated( - GeneratedRuleCfg, Helper, PostLegalizerHelper, Subtarget); + AMDGPUPostLegalizerCombinerImpl PostLegalizerHelper(RuleConfig, B, Helper, + Observer); - if (Generated.tryCombineAll(Observer, MI, B)) + if (PostLegalizerHelper.tryCombineAll(MI)) return true; switch (MI.getOpcode()) { @@ -425,10 +448,6 @@ return false; } -#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP -#include "AMDGPUGenPostLegalizeGICombiner.inc" -#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP - // Pass boilerplate // ================ @@ -464,7 +483,7 @@ } AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone) - : MachineFunctionPass(ID), IsOptNone(IsOptNone) { + : MachineFunctionPass(ID), IsOptNone(IsOptNone) { initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); } @@ -478,13 +497,13 @@ MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); const GCNSubtarget &ST = MF.getSubtarget(); - const AMDGPULegalizerInfo *LI - = static_cast(ST.getLegalizerInfo()); + const AMDGPULegalizerInfo *LI = + static_cast(ST.getLegalizerInfo()); GISelKnownBits *KB = &getAnalysis().get(MF); MachineDominatorTree *MDT = IsOptNone ? nullptr : &getAnalysis(); - AMDGPUPostLegalizerCombinerInfo PCInfo(ST, EnableOpt, F.hasOptSize(), + AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), F.hasMinSize(), LI, KB, MDT); Combiner C(PCInfo, TPC); return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); @@ -492,8 +511,8 @@ char AMDGPUPostLegalizerCombiner::ID = 0; INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, - "Combine AMDGPU machine instrs after legalization", - false, false) + "Combine AMDGPU machine instrs after legalization", false, + false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -20,28 +20,48 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h" +#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Target/TargetMachine.h" +#define GET_GICOMBINER_DEPS +#include "AMDGPUGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_DEPS + #define DEBUG_TYPE "amdgpu-prelegalizer-combiner" using namespace llvm; using namespace MIPatternMatch; +namespace { + +#define GET_GICOMBINER_TYPES +#include "AMDGPUGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_TYPES -class AMDGPUPreLegalizerCombinerHelper { +class AMDGPUPreLegalizerCombinerImpl : public GIMatchTableExecutor { protected: + const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig; + const GCNSubtarget &STI; + + GISelChangeObserver &Observer; MachineIRBuilder &B; MachineFunction &MF; MachineRegisterInfo &MRI; AMDGPUCombinerHelper &Helper; public: - AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, - AMDGPUCombinerHelper &Helper) - : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){}; + AMDGPUPreLegalizerCombinerImpl( + const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig, + const GCNSubtarget &STI, GISelChangeObserver &Observer, + MachineIRBuilder &B, AMDGPUCombinerHelper &Helper); + + static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; } + + bool tryCombineAll(MachineInstr &I) const; struct ClampI64ToI16MatchInfo { int64_t Cmp1 = 0; @@ -49,17 +69,46 @@ Register Origin; }; - bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineFunction &MF, - ClampI64ToI16MatchInfo &MatchInfo); + bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI, + const MachineFunction &MF, + ClampI64ToI16MatchInfo &MatchInfo) const; void applyClampI64ToI16(MachineInstr &MI, - const ClampI64ToI16MatchInfo &MatchInfo); + const ClampI64ToI16MatchInfo &MatchInfo) const; + +private: +#define GET_GICOMBINER_PREDICATES_DECL +#define AMDGPUSubtarget GCNSubtarget +#include "AMDGPUGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_PREDICATES_DECL +#undef AMDGPUSubtarget + +#define GET_GICOMBINER_TEMPORARIES_DECL +#include "AMDGPUGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_TEMPORARIES_DECL }; -bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16( - MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, - ClampI64ToI16MatchInfo &MatchInfo) { +#define GET_GICOMBINER_IMPL +#define AMDGPUSubtarget GCNSubtarget +#include "AMDGPUGenPreLegalizeGICombiner.inc" +#undef AMDGPUSubtarget +#undef GET_GICOMBINER_IMPL + +AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl( + const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig, + const GCNSubtarget &STI, GISelChangeObserver &Observer, MachineIRBuilder &B, + AMDGPUCombinerHelper &Helper) + : RuleConfig(RuleConfig), STI(STI), Observer(Observer), B(B), MF(B.getMF()), + MRI(*B.getMRI()), Helper(Helper), +#define GET_GICOMBINER_CONSTRUCTOR_INITS +#include "AMDGPUGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_CONSTRUCTOR_INITS +{ +} + +bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16( + MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF, + ClampI64ToI16MatchInfo &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!"); // Try to find a pattern where an i64 value should get clamped to short. @@ -118,8 +167,8 @@ // This can be efficiently written as following: // v_cvt_pk_i16_i32 v0, v0, v1 // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max -void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16( - MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) { +void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16( + MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const { Register Src = MatchInfo.Origin; assert(MI.getParent()->getParent()->getRegInfo().getType(Src) == @@ -154,40 +203,18 @@ MI.eraseFromParent(); } -class AMDGPUPreLegalizerCombinerHelperState { -protected: - AMDGPUCombinerHelper &Helper; - AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper; - -public: - AMDGPUPreLegalizerCombinerHelperState( - AMDGPUCombinerHelper &Helper, - AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper) - : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {} -}; - -#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS -#include "AMDGPUGenPreLegalizeGICombiner.inc" -#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS - -namespace { -#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H -#include "AMDGPUGenPreLegalizeGICombiner.inc" -#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H - class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo { GISelKnownBits *KB; MachineDominatorTree *MDT; + AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig; public: - AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; - AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, - GISelKnownBits *KB, MachineDominatorTree *MDT) + GISelKnownBits *KB, MachineDominatorTree *MDT) : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), KB(KB), MDT(MDT) { - if (!GeneratedRuleCfg.parseCommandLineOption()) + if (!RuleConfig.parseCommandLineOption()) report_fatal_error("Invalid rule identifier"); } @@ -196,15 +223,16 @@ }; bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, - MachineInstr &MI, - MachineIRBuilder &B) const { + MachineInstr &MI, + MachineIRBuilder &B) const { const auto *LI = MI.getMF()->getSubtarget().getLegalizerInfo(); AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ true, KB, MDT, LI); - AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper); - AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper, - PreLegalizerHelper); - if (Generated.tryCombineAll(Observer, MI, B)) + const GCNSubtarget &STI = MI.getMF()->getSubtarget(); + AMDGPUPreLegalizerCombinerImpl PreLegalizerHelper(RuleConfig, STI, Observer, + B, Helper); + + if (PreLegalizerHelper.tryCombineAll(MI)) return true; switch (MI.getOpcode()) { @@ -217,10 +245,6 @@ return false; } -#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP -#include "AMDGPUGenPreLegalizeGICombiner.inc" -#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP - // Pass boilerplate // ================ @@ -237,6 +261,7 @@ bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; + private: bool IsOptNone; }; @@ -259,7 +284,7 @@ } AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) - : MachineFunctionPass(ID), IsOptNone(IsOptNone) { + : MachineFunctionPass(ID), IsOptNone(IsOptNone) { initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -20,37 +20,55 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h" +#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/Target/TargetMachine.h" + +#define GET_GICOMBINER_DEPS +#include "AMDGPUGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_DEPS + #define DEBUG_TYPE "amdgpu-regbank-combiner" using namespace llvm; using namespace MIPatternMatch; -class AMDGPURegBankCombinerHelper { +namespace { +#define GET_GICOMBINER_TYPES +#include "AMDGPUGenRegBankGICombiner.inc" +#undef GET_GICOMBINER_TYPES + +class AMDGPURegBankCombinerImpl : public GIMatchTableExecutor { protected: + const AMDGPURegBankCombinerImplRuleConfig &RuleConfig; + MachineIRBuilder &B; MachineFunction &MF; MachineRegisterInfo &MRI; - const GCNSubtarget &Subtarget; + const GCNSubtarget &STI; const RegisterBankInfo &RBI; const TargetRegisterInfo &TRI; const SIInstrInfo &TII; CombinerHelper &Helper; + GISelChangeObserver &Observer; public: - AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper) - : B(B), MF(B.getMF()), MRI(*B.getMRI()), - Subtarget(MF.getSubtarget()), - RBI(*Subtarget.getRegBankInfo()), TRI(*Subtarget.getRegisterInfo()), - TII(*Subtarget.getInstrInfo()), Helper(Helper){}; + AMDGPURegBankCombinerImpl( + const AMDGPURegBankCombinerImplRuleConfig &RuleConfig, + MachineIRBuilder &B, CombinerHelper &Helper, + GISelChangeObserver &Observer); + + static const char *getName() { return "AMDGPURegBankCombinerImpl"; } + + bool tryCombineAll(MachineInstr &I) const; - bool isVgprRegBank(Register Reg); - Register getAsVgpr(Register Reg); + bool isVgprRegBank(Register Reg) const; + Register getAsVgpr(Register Reg) const; struct MinMaxMedOpc { unsigned Min, Max, Med; @@ -61,33 +79,62 @@ Register Val0, Val1, Val2; }; - MinMaxMedOpc getMinMaxPair(unsigned Opc); + MinMaxMedOpc getMinMaxPair(unsigned Opc) const; template bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc, - Register &Val, CstTy &K0, CstTy &K1); + Register &Val, CstTy &K0, CstTy &K1) const; - bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); - bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); - bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg); - bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg); - void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); - void applyClamp(MachineInstr &MI, Register &Reg); + bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const; + bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const; + bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg) const; + bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg) const; + void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const; + void applyClamp(MachineInstr &MI, Register &Reg) const; private: - SIModeRegisterDefaults getMode(); - bool getIEEE(); - bool getDX10Clamp(); - bool isFminnumIeee(const MachineInstr &MI); - bool isFCst(MachineInstr *MI); - bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1); + SIModeRegisterDefaults getMode() const; + bool getIEEE() const; + bool getDX10Clamp() const; + bool isFminnumIeee(const MachineInstr &MI) const; + bool isFCst(MachineInstr *MI) const; + bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1) const; + +#define GET_GICOMBINER_PREDICATES_DECL +#define AMDGPUSubtarget GCNSubtarget +#include "AMDGPUGenRegBankGICombiner.inc" +#undef GET_GICOMBINER_PREDICATES_DECL +#undef AMDGPUSubtarget + +#define GET_GICOMBINER_TEMPORARIES_DECL +#include "AMDGPUGenRegBankGICombiner.inc" +#undef GET_GICOMBINER_TEMPORARIES_DECL }; -bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) { +#define GET_GICOMBINER_IMPL +#define AMDGPUSubtarget GCNSubtarget +#include "AMDGPUGenRegBankGICombiner.inc" +#undef AMDGPUSubtarget +#undef GET_GICOMBINER_IMPL + +AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl( + const AMDGPURegBankCombinerImplRuleConfig &RuleConfig, MachineIRBuilder &B, + CombinerHelper &Helper, GISelChangeObserver &Observer) + : RuleConfig(RuleConfig), B(B), MF(B.getMF()), MRI(*B.getMRI()), + STI(MF.getSubtarget()), RBI(*STI.getRegBankInfo()), + TRI(*STI.getRegisterInfo()), TII(*STI.getInstrInfo()), Helper(Helper), + Observer(Observer), +#define GET_GICOMBINER_CONSTRUCTOR_INITS +#include "AMDGPUGenRegBankGICombiner.inc" +#undef GET_GICOMBINER_CONSTRUCTOR_INITS +{ +} + +bool AMDGPURegBankCombinerImpl::isVgprRegBank(Register Reg) const { return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID; } -Register AMDGPURegBankCombinerHelper::getAsVgpr(Register Reg) { +Register AMDGPURegBankCombinerImpl::getAsVgpr(Register Reg) const { if (isVgprRegBank(Reg)) return Reg; @@ -104,8 +151,8 @@ return VgprReg; } -AMDGPURegBankCombinerHelper::MinMaxMedOpc -AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) { +AMDGPURegBankCombinerImpl::MinMaxMedOpc +AMDGPURegBankCombinerImpl::getMinMaxPair(unsigned Opc) const { switch (Opc) { default: llvm_unreachable("Unsupported opcode"); @@ -126,10 +173,10 @@ } template -bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI, - MachineRegisterInfo &MRI, - MinMaxMedOpc MMMOpc, Register &Val, - CstTy &K0, CstTy &K1) { +bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI, + MachineRegisterInfo &MRI, + MinMaxMedOpc MMMOpc, Register &Val, + CstTy &K0, CstTy &K1) const { // 4 operand commutes of: min(max(Val, K0), K1). // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)). // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0). @@ -147,16 +194,15 @@ m_Cst(K0)))); } -bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3( - MachineInstr &MI, Med3MatchInfo &MatchInfo) { +bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3( + MachineInstr &MI, Med3MatchInfo &MatchInfo) const { Register Dst = MI.getOperand(0).getReg(); if (!isVgprRegBank(Dst)) return false; // med3 for i16 is only available on gfx9+, and not available for v2i16. LLT Ty = MRI.getType(Dst); - if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) && - Ty != LLT::scalar(32)) + if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32)) return false; MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode()); @@ -193,14 +239,13 @@ // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0 // min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true) // max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0 -bool AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3( - MachineInstr &MI, Med3MatchInfo &MatchInfo) { +bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3( + MachineInstr &MI, Med3MatchInfo &MatchInfo) const { Register Dst = MI.getOperand(0).getReg(); LLT Ty = MRI.getType(Dst); // med3 for f16 is only available on gfx9+, and not available for v2f16. - if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) && - Ty != LLT::scalar(32)) + if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32)) return false; auto OpcodeTriple = getMinMaxPair(MI.getOpcode()); @@ -233,8 +278,8 @@ return false; } -bool AMDGPURegBankCombinerHelper::matchFPMinMaxToClamp(MachineInstr &MI, - Register &Reg) { +bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI, + Register &Reg) const { // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16). auto OpcodeTriple = getMinMaxPair(MI.getOpcode()); Register Val; @@ -269,8 +314,8 @@ // min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0 // min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0 // min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0 -bool AMDGPURegBankCombinerHelper::matchFPMed3ToClamp(MachineInstr &MI, - Register &Reg) { +bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI, + Register &Reg) const { // In llvm-ir, clamp is often represented as an intrinsic call to // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders. MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI); @@ -308,15 +353,16 @@ return false; } -void AMDGPURegBankCombinerHelper::applyClamp(MachineInstr &MI, Register &Reg) { +void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &MI, + Register &Reg) const { B.setInstrAndDebugLoc(MI); B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg}, MI.getFlags()); MI.eraseFromParent(); } -void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI, - Med3MatchInfo &MatchInfo) { +void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI, + Med3MatchInfo &MatchInfo) const { B.setInstrAndDebugLoc(MI); B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)}, {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1), @@ -325,24 +371,26 @@ MI.eraseFromParent(); } -SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() { +SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const { return MF.getInfo()->getMode(); } -bool AMDGPURegBankCombinerHelper::getIEEE() { return getMode().IEEE; } +bool AMDGPURegBankCombinerImpl::getIEEE() const { return getMode().IEEE; } -bool AMDGPURegBankCombinerHelper::getDX10Clamp() { return getMode().DX10Clamp; } +bool AMDGPURegBankCombinerImpl::getDX10Clamp() const { + return getMode().DX10Clamp; +} -bool AMDGPURegBankCombinerHelper::isFminnumIeee(const MachineInstr &MI) { +bool AMDGPURegBankCombinerImpl::isFminnumIeee(const MachineInstr &MI) const { return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE; } -bool AMDGPURegBankCombinerHelper::isFCst(MachineInstr *MI) { +bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *MI) const { return MI->getOpcode() == AMDGPU::G_FCONSTANT; } -bool AMDGPURegBankCombinerHelper::isClampZeroToOne(MachineInstr *K0, - MachineInstr *K1) { +bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0, + MachineInstr *K1) const { if (isFCst(K0) && isFCst(K1)) { const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm(); const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm(); @@ -352,40 +400,19 @@ return false; } -class AMDGPURegBankCombinerHelperState { -protected: - CombinerHelper &Helper; - AMDGPURegBankCombinerHelper &RegBankHelper; - -public: - AMDGPURegBankCombinerHelperState(CombinerHelper &Helper, - AMDGPURegBankCombinerHelper &RegBankHelper) - : Helper(Helper), RegBankHelper(RegBankHelper) {} -}; - -#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS -#include "AMDGPUGenRegBankGICombiner.inc" -#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS - -namespace { -#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H -#include "AMDGPUGenRegBankGICombiner.inc" -#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H - class AMDGPURegBankCombinerInfo final : public CombinerInfo { GISelKnownBits *KB; MachineDominatorTree *MDT; + AMDGPURegBankCombinerImplRuleConfig RuleConfig; public: - AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg; - AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, - const AMDGPULegalizerInfo *LI, - GISelKnownBits *KB, MachineDominatorTree *MDT) + const AMDGPULegalizerInfo *LI, GISelKnownBits *KB, + MachineDominatorTree *MDT) : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), KB(KB), MDT(MDT) { - if (!GeneratedRuleCfg.parseCommandLineOption()) + if (!RuleConfig.parseCommandLineOption()) report_fatal_error("Invalid rule identifier"); } @@ -394,23 +421,17 @@ }; bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer, - MachineInstr &MI, - MachineIRBuilder &B) const { + MachineInstr &MI, + MachineIRBuilder &B) const { CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false, KB, MDT); - AMDGPURegBankCombinerHelper RegBankHelper(B, Helper); - AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper, - RegBankHelper); + AMDGPURegBankCombinerImpl RegBankHelper(RuleConfig, B, Helper, Observer); - if (Generated.tryCombineAll(Observer, MI, B)) + if (RegBankHelper.tryCombineAll(MI)) return true; return false; } -#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP -#include "AMDGPUGenRegBankGICombiner.inc" -#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP - // Pass boilerplate // ================ @@ -420,9 +441,7 @@ AMDGPURegBankCombiner(bool IsOptNone = false); - StringRef getPassName() const override { - return "AMDGPURegBankCombiner"; - } + StringRef getPassName() const override { return "AMDGPURegBankCombiner"; } bool runOnMachineFunction(MachineFunction &MF) override; @@ -446,7 +465,7 @@ } AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone) - : MachineFunctionPass(ID), IsOptNone(IsOptNone) { + : MachineFunctionPass(ID), IsOptNone(IsOptNone) { initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry()); } @@ -460,14 +479,14 @@ MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); const GCNSubtarget &ST = MF.getSubtarget(); - const AMDGPULegalizerInfo *LI - = static_cast(ST.getLegalizerInfo()); + const AMDGPULegalizerInfo *LI = + static_cast(ST.getLegalizerInfo()); GISelKnownBits *KB = &getAnalysis().get(MF); MachineDominatorTree *MDT = IsOptNone ? nullptr : &getAnalysis(); - AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), - F.hasMinSize(), LI, KB, MDT); + AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), F.hasMinSize(), + LI, KB, MDT); Combiner C(PCInfo, TPC); return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); } diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -17,12 +17,12 @@ set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td) tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel) -tablegen(LLVM AMDGPUGenPreLegalizeGICombiner.inc -gen-global-isel-combiner - -combiners="AMDGPUPreLegalizerCombinerHelper") -tablegen(LLVM AMDGPUGenPostLegalizeGICombiner.inc -gen-global-isel-combiner - -combiners="AMDGPUPostLegalizerCombinerHelper") -tablegen(LLVM AMDGPUGenRegBankGICombiner.inc -gen-global-isel-combiner - -combiners="AMDGPURegBankCombinerHelper") +tablegen(LLVM AMDGPUGenPreLegalizeGICombiner.inc -gen-global-isel-combiner-matchtable + -combiners="AMDGPUPreLegalizerCombiner") +tablegen(LLVM AMDGPUGenPostLegalizeGICombiner.inc -gen-global-isel-combiner-matchtable + -combiners="AMDGPUPostLegalizerCombiner") +tablegen(LLVM AMDGPUGenRegBankGICombiner.inc -gen-global-isel-combiner-matchtable + -combiners="AMDGPURegBankCombiner") set(LLVM_TARGET_DEFINITIONS R600.td) tablegen(LLVM R600GenAsmWriter.inc -gen-asm-writer)