diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h @@ -0,0 +1,15 @@ +//===- AMDGPUExportClustering.h - AMDGPU Export Clustering ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +std::unique_ptr createAMDGPUExportClusteringDAGMutation(); + +} // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp @@ -0,0 +1,92 @@ +//===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains a DAG scheduling mutation to cluster shader +/// exports. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUExportClustering.h" +#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIInstrInfo.h" + +using namespace llvm; + +namespace { + +class ExportClustering : public ScheduleDAGMutation { +public: + ExportClustering() {} + void apply(ScheduleDAGInstrs *DAG) override; +}; + +static bool isExport(const SUnit &SU) { + const MachineInstr *MI = SU.getInstr(); + return MI->getOpcode() == AMDGPU::EXP || + MI->getOpcode() == AMDGPU::EXP_DONE; +} + +static void buildCluster(ArrayRef Exports, ScheduleDAGInstrs *DAG) { + // Cluster a series of exports. Also copy all dependencies to the first + // export to avoid computation being inserted into the chain. + SUnit *ChainHead = Exports[0]; + for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) { + SUnit *SUa = Exports[Idx]; + SUnit *SUb = Exports[Idx + 1]; + if (DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { + for (const SDep &Pred : SUb->Preds) { + SUnit *PredSU = Pred.getSUnit(); + if (Pred.isWeak() || isExport(*PredSU)) + continue; + DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial)); + } + } + } +} + +void ExportClustering::apply(ScheduleDAGInstrs *DAG) { + SmallVector, 4> ExportChains; + DenseMap ChainMap; + + // Build chains of exports + for (SUnit &SU : DAG->SUnits) { + if (!isExport(SU)) + continue; + + unsigned ChainID = ExportChains.size(); + for (const SDep &Pred : SU.Preds) { + const SUnit &PredSU = *Pred.getSUnit(); + if (isExport(PredSU) && !Pred.isArtificial()) { + ChainID = ChainMap.lookup(PredSU.NodeNum); + break; + } + } + ChainMap[SU.NodeNum] = ChainID; + + if (ChainID == ExportChains.size()) + ExportChains.push_back(SmallVector()); + + auto &Chain = ExportChains[ChainID]; + Chain.push_back(&SU); + } + + // Apply clustering + for (auto &Chain : ExportChains) + buildCluster(Chain, DAG); +} + +} // end namespace + +namespace llvm { + +std::unique_ptr createAMDGPUExportClusteringDAGMutation() { + return std::make_unique(); +} + +} // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -16,6 +16,7 @@ #include "AMDGPU.h" #include "AMDGPUAliasAnalysis.h" #include "AMDGPUCallLowering.h" +#include "AMDGPUExportClustering.h" #include "AMDGPUInstructionSelector.h" #include "AMDGPULegalizerInfo.h" #include "AMDGPUMacroFusion.h" @@ -283,6 +284,7 @@ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createAMDGPUMacroFusionDAGMutation()); + DAG->addMutation(createAMDGPUExportClusteringDAGMutation()); return DAG; } diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -42,6 +42,7 @@ AMDGPUAtomicOptimizer.cpp AMDGPUCallLowering.cpp AMDGPUCodeGenPrepare.cpp + AMDGPUExportClustering.cpp AMDGPUFixFunctionBitcasts.cpp AMDGPUFrameLowering.cpp AMDGPUHSAMetadataStreamer.cpp diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll @@ -542,14 +542,13 @@ ; GCN-LABEL: {{^}}test_export_clustering: ; GCN-DAG: v_mov_b32_e32 [[W0:v[0-9]+]], 0 +; GCN-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0 ; GCN-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0 ; GCN-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1 ; GCN-DAG: v_add_f32_e32 [[Z0:v[0-9]+]] -; GCN-DAG: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}} ; GCN-DAG: v_sub_f32_e32 [[Z1:v[0-9]+]] -; GCN: s_waitcnt expcnt(0) -; GCN: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0 -; GCN: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}} +; GCN: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}} +; GCN-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}} define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 { %z0 = fadd float %x, %y call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %x, float %y, float %z0, float 0.0, i1 false, i1 false)