diff --git a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h --- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h +++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h @@ -60,13 +60,5 @@ LogicalResult setMappingAttr(scf::ParallelOp ploopOp, ArrayRef mapping); } // namespace gpu - -/// Maps the parallel loops found in the given function to workgroups. The first -/// loop encountered will be mapped to the global workgroup and the second loop -/// encountered to the local workgroup. Within each mapping, the first three -/// dimensions are mapped to x/y/z hardware ids and all following dimensions are -/// mapped to sequential loops. -void greedilyMapParallelSCFToGPU(Region ®ion); - } // namespace mlir #endif // MLIR_DIALECT_GPU_PARALLELLOOPMAPPER_H diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h --- a/mlir/include/mlir/Dialect/GPU/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Passes.h @@ -39,6 +39,13 @@ /// Rewrites a function region so that GPU ops execute asynchronously. std::unique_ptr> createGpuAsyncRegionPass(); +/// Maps the parallel loops found in the given function to workgroups. The first +/// loop encountered will be mapped to the global workgroup and the second loop +/// encountered to the local workgroup. Within each mapping, the first three +/// dimensions are mapped to x/y/z hardware ids and all following dimensions are +/// mapped to sequential loops. +std::unique_ptr> createGpuMapParallelLoopsPass(); + /// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect. void populateGpuAllReducePatterns(RewritePatternSet &patterns); diff --git a/mlir/include/mlir/Dialect/GPU/Passes.td b/mlir/include/mlir/Dialect/GPU/Passes.td --- a/mlir/include/mlir/Dialect/GPU/Passes.td +++ b/mlir/include/mlir/Dialect/GPU/Passes.td @@ -29,4 +29,11 @@ let dependentDialects = ["async::AsyncDialect"]; } +def GpuMapParallelLoopsPass + : Pass<"gpu-map-parallel-loops", "mlir::func::FuncOp"> { + let summary = "Greedily maps loops to GPU hardware dimensions."; + let constructor = "mlir::createGpuMapParallelLoopsPass()"; + let description = "Greedily maps loops to GPU hardware dimensions."; +} + #endif // MLIR_DIALECT_GPU_PASSES diff --git a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp --- a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp @@ -13,26 +13,25 @@ #include "mlir/Dialect/GPU/ParallelLoopMapper.h" +#include "PassDetail.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/GPU/Passes.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/IR/AffineMap.h" #include "mlir/Pass/Pass.h" -using namespace mlir; -using namespace mlir::gpu; -using namespace mlir::scf; - #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc" #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc" + namespace mlir { -namespace gpu { -StringRef getMappingAttrName() { return "mapping"; } +using scf::ParallelOp; -ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor, - AffineMap map, - AffineMap bound) { +StringRef gpu::getMappingAttrName() { return "mapping"; } + +gpu::ParallelLoopDimMapping +gpu::getParallelLoopDimMappingAttr(Processor processor, AffineMap map, + AffineMap bound) { MLIRContext *context = map.getContext(); OpBuilder builder(context); return ParallelLoopDimMapping::get( @@ -40,8 +39,8 @@ AffineMapAttr::get(map), AffineMapAttr::get(bound), context); } -LogicalResult setMappingAttr(scf::ParallelOp ploopOp, - ArrayRef mapping) { +LogicalResult gpu::setMappingAttr(ParallelOp ploopOp, + ArrayRef mapping) { // Verify that each processor is mapped to only once. llvm::DenseSet specifiedMappings; for (auto dimAttr : mapping) { @@ -56,20 +55,17 @@ ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs)); return success(); } -} // namespace gpu -} // namespace mlir +namespace gpu { namespace { - enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 }; +} // namespace static constexpr int kNumHardwareIds = 3; -} // namespace - /// Bounded increment on MappingLevel. Increments to the next /// level unless Sequential was already reached. -MappingLevel &operator++(MappingLevel &mappingLevel) { +static MappingLevel &operator++(MappingLevel &mappingLevel) { if (mappingLevel < Sequential) { mappingLevel = static_cast(mappingLevel + 1); } @@ -82,8 +78,7 @@ /// TODO: Make this use x for the inner-most loop that is /// distributed to map to x, the next innermost to y and the next innermost to /// z. -static gpu::Processor getHardwareIdForMapping(MappingLevel level, - int dimension) { +static Processor getHardwareIdForMapping(MappingLevel level, int dimension) { if (dimension >= kNumHardwareIds || level == Sequential) return Processor::Sequential; @@ -145,6 +140,23 @@ } } -void mlir::greedilyMapParallelSCFToGPU(Region ®ion) { - region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); +namespace { +struct GpuMapParallelLoopsPass + : public GpuMapParallelLoopsPassBase { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(GpuMapParallelLoopsPass) + + void runOnOperation() override { + for (Region ®ion : getOperation()->getRegions()) { + region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); + } + } +}; + +} // namespace +} // namespace gpu +} // namespace mlir + +std::unique_ptr> +mlir::createGpuMapParallelLoopsPass() { + return std::make_unique(); } diff --git a/mlir/test/Dialect/GPU/mapping.mlir b/mlir/test/Dialect/GPU/mapping.mlir --- a/mlir/test/Dialect/GPU/mapping.mlir +++ b/mlir/test/Dialect/GPU/mapping.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-gpu-greedy-parallel-loop-mapping -split-input-file %s | FileCheck %s +// RUN: mlir-opt -gpu-map-parallel-loops -split-input-file %s | FileCheck %s func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) { diff --git a/mlir/test/lib/Dialect/GPU/CMakeLists.txt b/mlir/test/lib/Dialect/GPU/CMakeLists.txt --- a/mlir/test/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/test/lib/Dialect/GPU/CMakeLists.txt @@ -3,7 +3,6 @@ TestConvertGPUKernelToCubin.cpp TestConvertGPUKernelToHsaco.cpp TestGpuMemoryPromotion.cpp - TestGpuParallelLoopMapping.cpp TestGpuRewrite.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp b/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp deleted file mode 100644 --- a/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp +++ /dev/null @@ -1,47 +0,0 @@ -//===- TestGPUParallelLoopMapping.cpp - Test pass for GPU loop mapping ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the pass testing the utilities for mapping parallel -// loops to gpu hardware ids. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/GPU/ParallelLoopMapper.h" -#include "mlir/Pass/Pass.h" - -using namespace mlir; - -namespace { -/// Simple pass for testing the mapping of parallel loops to hardware ids using -/// a greedy mapping strategy. -struct TestGpuGreedyParallelLoopMappingPass - : public PassWrapper> { - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID( - TestGpuGreedyParallelLoopMappingPass) - - StringRef getArgument() const final { - return "test-gpu-greedy-parallel-loop-mapping"; - } - StringRef getDescription() const final { - return "Greedily maps all parallel loops to gpu hardware ids."; - } - void runOnOperation() override { - for (Region ®ion : getOperation()->getRegions()) - greedilyMapParallelSCFToGPU(region); - } -}; -} // namespace - -namespace mlir { -namespace test { -void registerTestGpuParallelLoopMappingPass() { - PassRegistration(); -} -} // namespace test -} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -79,7 +79,6 @@ void registerTestExpandTanhPass(); void registerTestComposeSubView(); void registerTestMultiBuffering(); -void registerTestGpuParallelLoopMappingPass(); void registerTestIRVisitorsPass(); void registerTestGenericIRVisitorsPass(); void registerTestGenericIRVisitorsInterruptPass(); @@ -176,7 +175,6 @@ mlir::test::registerTestExpandTanhPass(); mlir::test::registerTestComposeSubView(); mlir::test::registerTestMultiBuffering(); - mlir::test::registerTestGpuParallelLoopMappingPass(); mlir::test::registerTestIRVisitorsPass(); mlir::test::registerTestGenericIRVisitorsPass(); mlir::test::registerTestInterfaces();