Index: llvm/lib/Target/AMDGPU/AMDGPU.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPU.h +++ llvm/lib/Target/AMDGPU/AMDGPU.h @@ -27,6 +27,10 @@ class PassRegistry; class Module; +// GlobalISel passes +void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); +FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); + // R600 Passes FunctionPass *createR600VectorRegMerger(); FunctionPass *createR600ExpandSpecialInstrsPass(); Index: llvm/lib/Target/AMDGPU/AMDGPUCombine.td =================================================================== --- /dev/null +++ llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -0,0 +1,15 @@ +//=- AMDGPUCombine.td - Define AMDGPU Combine Rules ----------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +include "llvm/Target/GlobalISel/Combine.td" + +def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper< + "AMDGPUGenPreLegalizerCombinerHelper", [all_combines, + elide_br_by_inverting_cond]> { + let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule"; +} Index: llvm/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// include "AMDGPU.td" +include "AMDGPUCombine.td" def sd_vsrc0 : ComplexPattern; def gi_vsrc0 : Index: llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -0,0 +1,149 @@ +//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass does combining of machine instructions at the generic MI level, +// before the legalizer. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "amdgpu-prelegalizer-combiner" + +using namespace llvm; +using namespace MIPatternMatch; + +#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS +#include "AMDGPUGenGICombiner.inc" +#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS + +namespace { +#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H +#include "AMDGPUGenGICombiner.inc" +#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H + +class AMDGPUPreLegalizerCombinerInfo : public CombinerInfo { + GISelKnownBits *KB; + MachineDominatorTree *MDT; + +public: + AMDGPUGenPreLegalizerCombinerHelper Generated; + + AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, + GISelKnownBits *KB, MachineDominatorTree *MDT) + : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, + /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), + KB(KB), MDT(MDT) { + if (!Generated.parseCommandLineOption()) + report_fatal_error("Invalid rule identifier"); + } + + virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, + MachineIRBuilder &B) const override; +}; + +bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, + MachineInstr &MI, + MachineIRBuilder &B) const { + CombinerHelper Helper(Observer, B, KB, MDT); + + if (Generated.tryCombineAll(Observer, MI, B, Helper)) + return true; + + switch (MI.getOpcode()) { + case TargetOpcode::G_CONCAT_VECTORS: + return Helper.tryCombineConcatVectors(MI); + case TargetOpcode::G_SHUFFLE_VECTOR: + return Helper.tryCombineShuffleVector(MI); + } + + return false; +} + +#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP +#include "AMDGPUGenGICombiner.inc" +#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP + +// Pass boilerplate +// ================ + +class AMDGPUPreLegalizerCombiner : public MachineFunctionPass { +public: + static char ID; + + AMDGPUPreLegalizerCombiner(bool IsOptNone = false); + + StringRef getPassName() const override { return "AMDGPUPreLegalizerCombiner"; } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; +private: + bool IsOptNone; +}; +} // end anonymous namespace + +void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesCFG(); + getSelectionDAGFallbackAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + if (!IsOptNone) { + AU.addRequired(); + AU.addPreserved(); + } + MachineFunctionPass::getAnalysisUsage(AU); +} + +AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) + : MachineFunctionPass(ID), IsOptNone(IsOptNone) { + initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); +} + +bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + auto *TPC = &getAnalysis(); + const Function &F = MF.getFunction(); + bool EnableOpt = + MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); + GISelKnownBits *KB = &getAnalysis().get(MF); + MachineDominatorTree *MDT = + IsOptNone ? nullptr : &getAnalysis(); + AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), + F.hasMinSize(), KB, MDT); + Combiner C(PCInfo, TPC); + return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); +} + +char AMDGPUPreLegalizerCombiner::ID = 0; +INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, + "Combine AMDGPU machine instrs before legalization", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) +INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, + "Combine AMDGPU machine instrs before legalization", false, + false) + +namespace llvm { +FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) { + return new AMDGPUPreLegalizerCombiner(IsOptNone); +} +} // end namespace llvm Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -217,6 +217,7 @@ initializeAMDGPULowerKernelAttributesPass(*PR); initializeAMDGPULowerIntrinsicsPass(*PR); initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR); + initializeAMDGPUPreLegalizerCombinerPass(*PR); initializeAMDGPUPromoteAllocaPass(*PR); initializeAMDGPUCodeGenPreparePass(*PR); initializeAMDGPUPropagateAttributesEarlyPass(*PR); @@ -617,6 +618,7 @@ bool addILPOpts() override; bool addInstSelector() override; bool addIRTranslator() override; + void addPreLegalizeMachineIR() override; bool addLegalizeMachineIR() override; bool addRegBankSelect() override; bool addGlobalInstructionSelect() override; @@ -895,6 +897,11 @@ return false; } +void GCNPassConfig::addPreLegalizeMachineIR() { + bool IsOptNone = getOptLevel() == CodeGenOpt::None; + addPass(createAMDGPUPreLegalizeCombiner(IsOptNone)); +} + bool GCNPassConfig::addLegalizeMachineIR() { addPass(new Legalizer()); return false; Index: llvm/lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- llvm/lib/Target/AMDGPU/CMakeLists.txt +++ llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -15,6 +15,8 @@ set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td) tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel) +tablegen(LLVM AMDGPUGenGICombiner.inc -gen-global-isel-combiner + -combiners="AMDGPUPreLegalizerCombinerHelper") set(LLVM_TARGET_DEFINITIONS R600.td) tablegen(LLVM R600GenAsmWriter.inc -gen-asm-writer) @@ -58,6 +60,7 @@ AMDGPUMacroFusion.cpp AMDGPUMCInstLower.cpp AMDGPUOpenCLEnqueuedBlockLowering.cpp + AMDGPUPreLegalizerCombiner.cpp AMDGPUPromoteAlloca.cpp AMDGPUPropagateAttributes.cpp AMDGPURegisterBankInfo.cpp