diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h @@ -53,12 +53,16 @@ /// mapped to sequential loops. std::unique_ptr> createGpuMapParallelLoopsPass(); +/// Collect a set of patterns to rewrite GlobalIdOp op within the GPU dialect. +void populateGpuGlobalIdPatterns(RewritePatternSet &patterns); + /// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect. void populateGpuAllReducePatterns(RewritePatternSet &patterns); /// Collect all patterns to rewrite ops within the GPU dialect. inline void populateGpuRewritePatterns(RewritePatternSet &patterns) { populateGpuAllReducePatterns(patterns); + populateGpuGlobalIdPatterns(patterns); } namespace gpu { diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -46,6 +46,7 @@ add_mlir_dialect_library(MLIRGPUTransforms Transforms/AllReduceLowering.cpp Transforms/AsyncRegionRewriter.cpp + Transforms/GlobalIdRewriter.cpp Transforms/KernelOutlining.cpp Transforms/MemoryPromotion.cpp Transforms/ParallelLoopMapper.cpp @@ -75,6 +76,7 @@ MLIRExecutionEngineUtils MLIRGPUOps MLIRIR + MLIRIndexDialect MLIRLLVMDialect MLIRGPUToLLVMIRTranslation MLIRLLVMToLLVMIRTranslation diff --git a/mlir/lib/Dialect/GPU/Transforms/GlobalIdRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/GlobalIdRewriter.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/GPU/Transforms/GlobalIdRewriter.cpp @@ -0,0 +1,45 @@ +//===- GlobalIdRewriter.cpp - Implementation of GlobalId rewriting -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements in-dialect rewriting of the global_id op for archs +// where global_id.x = threadId.x + blockId.x * blockDim.x +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/GPU/Transforms/Passes.h" +#include "mlir/Dialect/Index/IR/IndexOps.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; + +namespace { +struct GpuGlobalIdRewriter : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(gpu::GlobalIdOp op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + auto dim = op.getDimension(); + auto blockId = rewriter.create(loc, dim); + auto blockDim = rewriter.create(loc, dim); + // Compute blockId.x * blockDim.x + auto tmp = rewriter.create(op.getLoc(), blockId, blockDim); + auto threadId = rewriter.create(loc, dim); + // Compute threadId.x + blockId.x * blockDim.x + rewriter.replaceOpWithNewOp(op, threadId, tmp); + return success(); + } +}; +} // namespace + +void mlir::populateGpuGlobalIdPatterns(RewritePatternSet &patterns) { + patterns.add(patterns.getContext()); +} diff --git a/mlir/test/Dialect/GPU/globalId-rewrite.mlir b/mlir/test/Dialect/GPU/globalId-rewrite.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/GPU/globalId-rewrite.mlir @@ -0,0 +1,39 @@ +// RUN: mlir-opt --test-gpu-rewrite -split-input-file %s | FileCheck %s + +module { + // CHECK-LABEL: func.func @globalId + // CHECK-SAME: (%[[SZ:.*]]: index, %[[MEM:.*]]: memref) { + func.func @globalId(%sz : index, %mem: memref) { + gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz) + threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) { + // CHECK: %[[BIDY:.*]] = gpu.block_id x + // CHECK-NEXT: %[[BDIMY:.*]] = gpu.block_dim x + // CHECK-NEXT: %[[TMPY:.*]] = index.mul %[[BIDY]], %[[BDIMY]] + // CHECK-NEXT: %[[TIDX:.*]] = gpu.thread_id x + // CHECK-NEXT: %[[GIDX:.*]] = index.add %[[TIDX]], %[[TMPY]] + %idx = gpu.global_id x + // CHECK: memref.store %[[GIDX]], %[[MEM]][] : memref + memref.store %idx, %mem[] : memref + + // CHECK: %[[BIDY:.*]] = gpu.block_id y + // CHECK-NEXT: %[[BDIMY:.*]] = gpu.block_dim y + // CHECK-NEXT: %[[TMPY:.*]] = index.mul %[[BIDY]], %[[BDIMY]] + // CHECK-NEXT: %[[TIDY:.*]] = gpu.thread_id y + // CHECK-NEXT: %[[GIDY:.*]] = index.add %[[TIDY]], %[[TMPY]] + %idy = gpu.global_id y + // CHECK: memref.store %[[GIDY]], %[[MEM]][] : memref + memref.store %idy, %mem[] : memref + + // CHECK: %[[BIDZ:.*]] = gpu.block_id z + // CHECK-NEXT: %[[BDIMZ:.*]] = gpu.block_dim z + // CHECK-NEXT: %[[TMPZ:.*]] = index.mul %[[BIDZ]], %[[BDIMZ]] + // CHECK-NEXT: %[[TIDZ:.*]] = gpu.thread_id z + // CHECK-NEXT: %[[GIDZ:.*]] = index.add %[[TIDZ]], %[[TMPZ]] + %idz = gpu.global_id z + // CHECK: memref.store %[[GIDZ]], %[[MEM]][] : memref + memref.store %idz, %mem[] : memref + gpu.terminator + } + return + } +} diff --git a/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp b/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp --- a/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp +++ b/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/Transforms/Passes.h" +#include "mlir/Dialect/Index/IR/IndexDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -25,7 +26,7 @@ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestGpuRewritePass) void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); } StringRef getArgument() const final { return "test-gpu-rewrite"; }