diff --git a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h --- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h +++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h @@ -60,13 +60,5 @@ LogicalResult setMappingAttr(scf::ParallelOp ploopOp, ArrayRef mapping); } // namespace gpu - -/// Maps the parallel loops found in the given function to workgroups. The first -/// loop encountered will be mapped to the global workgroup and the second loop -/// encountered to the local workgroup. Within each mapping, the first three -/// dimensions are mapped to x/y/z hardware ids and all following dimensions are -/// mapped to sequential loops. -void greedilyMapParallelSCFToGPU(Region ®ion); - } // namespace mlir #endif // MLIR_DIALECT_GPU_PARALLELLOOPMAPPER_H diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h --- a/mlir/include/mlir/Dialect/GPU/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Passes.h @@ -39,6 +39,13 @@ /// Rewrites a function region so that GPU ops execute asynchronously. std::unique_ptr> createGpuAsyncRegionPass(); +/// Maps the parallel loops found in the given function to workgroups. The first +/// loop encountered will be mapped to the global workgroup and the second loop +/// encountered to the local workgroup. Within each mapping, the first three +/// dimensions are mapped to x/y/z hardware ids and all following dimensions are +/// mapped to sequential loops. +std::unique_ptr> createGpuMapParallelLoopsPass(); + /// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect. void populateGpuAllReducePatterns(RewritePatternSet &patterns); diff --git a/mlir/include/mlir/Dialect/GPU/Passes.td b/mlir/include/mlir/Dialect/GPU/Passes.td --- a/mlir/include/mlir/Dialect/GPU/Passes.td +++ b/mlir/include/mlir/Dialect/GPU/Passes.td @@ -29,4 +29,11 @@ let dependentDialects = ["async::AsyncDialect"]; } +def GpuMapParallelLoopsPass + : Pass<"gpu-map-parallel-loops", "mlir::func::FuncOp"> { + let summary = "Greedily maps loops to GPU hardware dimensions."; + let constructor = "mlir::createGpuMapParallelLoopsPass()"; + let description = "Greedily maps loops to GPU hardware dimensions."; +} + #endif // MLIR_DIALECT_GPU_PASSES diff --git a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp --- a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/GPU/ParallelLoopMapper.h" +#include "PassDetail.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/GPU/Passes.h" #include "mlir/Dialect/SCF/SCF.h" @@ -145,6 +146,19 @@ } } -void mlir::greedilyMapParallelSCFToGPU(Region ®ion) { - region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); +namespace { +struct GpuMapParallelLoopsPass + : public GpuMapParallelLoopsPassBase { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(GpuMapParallelLoopsPass) + + void runOnOperation() override { + for (Region ®ion : getOperation()->getRegions()) + region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); + } +}; +} // namespace + +std::unique_ptr> +mlir::createGpuMapParallelLoopsPass() { + return std::make_unique(); } diff --git a/mlir/test/Dialect/GPU/mapping.mlir b/mlir/test/Dialect/GPU/mapping.mlir --- a/mlir/test/Dialect/GPU/mapping.mlir +++ b/mlir/test/Dialect/GPU/mapping.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-gpu-greedy-parallel-loop-mapping -split-input-file %s | FileCheck %s +// RUN: mlir-opt -gpu-map-parallel-loops -split-input-file %s | FileCheck %s func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) { diff --git a/mlir/test/lib/Dialect/GPU/CMakeLists.txt b/mlir/test/lib/Dialect/GPU/CMakeLists.txt --- a/mlir/test/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/test/lib/Dialect/GPU/CMakeLists.txt @@ -3,7 +3,6 @@ TestConvertGPUKernelToCubin.cpp TestConvertGPUKernelToHsaco.cpp TestGpuMemoryPromotion.cpp - TestGpuParallelLoopMapping.cpp TestGpuRewrite.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp b/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp deleted file mode 100644 --- a/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp +++ /dev/null @@ -1,47 +0,0 @@ -//===- TestGPUParallelLoopMapping.cpp - Test pass for GPU loop mapping ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the pass testing the utilities for mapping parallel -// loops to gpu hardware ids. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/GPU/ParallelLoopMapper.h" -#include "mlir/Pass/Pass.h" - -using namespace mlir; - -namespace { -/// Simple pass for testing the mapping of parallel loops to hardware ids using -/// a greedy mapping strategy. -struct TestGpuGreedyParallelLoopMappingPass - : public PassWrapper> { - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID( - TestGpuGreedyParallelLoopMappingPass) - - StringRef getArgument() const final { - return "test-gpu-greedy-parallel-loop-mapping"; - } - StringRef getDescription() const final { - return "Greedily maps all parallel loops to gpu hardware ids."; - } - void runOnOperation() override { - for (Region ®ion : getOperation()->getRegions()) - greedilyMapParallelSCFToGPU(region); - } -}; -} // namespace - -namespace mlir { -namespace test { -void registerTestGpuParallelLoopMappingPass() { - PassRegistration(); -} -} // namespace test -} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -79,7 +79,6 @@ void registerTestExpandTanhPass(); void registerTestComposeSubView(); void registerTestMultiBuffering(); -void registerTestGpuParallelLoopMappingPass(); void registerTestIRVisitorsPass(); void registerTestGenericIRVisitorsPass(); void registerTestGenericIRVisitorsInterruptPass(); @@ -176,7 +175,6 @@ mlir::test::registerTestExpandTanhPass(); mlir::test::registerTestComposeSubView(); mlir::test::registerTestMultiBuffering(); - mlir::test::registerTestGpuParallelLoopMappingPass(); mlir::test::registerTestIRVisitorsPass(); mlir::test::registerTestGenericIRVisitorsPass(); mlir::test::registerTestInterfaces();