diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -87,5 +87,19 @@ def ROCDL_GridDimZOp : ROCDL_DeviceFunctionOp<"grid.dim.z", "__ockl_get_global_size", 2>; +//===----------------------------------------------------------------------===// +// Synchronization primitives + +def ROCDL_BarrierOp : ROCDL_Op<"barrier"> { + string llvmBuilder = [{ + llvm::LLVMContext &llvmContext = builder.getContext(); + builder.CreateFence(llvm::AtomicOrdering::Release, + llvmContext.getOrInsertSyncScopeID("workgroup")); + createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier); + builder.CreateFence(llvm::AtomicOrdering::Acquire, + llvmContext.getOrInsertSyncScopeID("workgroup")); + }]; + let assemblyFormat = "attr-dict"; +} #endif // ROCDLIR_OPS diff --git a/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt b/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt --- a/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt @@ -1,9 +1,15 @@ +set(LLVM_TARGET_DEFINITIONS GPUToROCDL.td) +mlir_tablegen(GPUToROCDL.cpp.inc -gen-rewriters) +add_public_tablegen_target(MLIRGPUToROCDLIncGen) + add_mlir_conversion_library(MLIRGPUtoROCDLTransforms LowerGpuOpsToROCDLOps.cpp DEPENDS MLIRConversionPassIncGen + MLIRGPUToROCDLIncGen ) + target_link_libraries(MLIRGPUtoROCDLTransforms PUBLIC LLVMSupport diff --git a/mlir/lib/Conversion/GPUToROCDL/GPUToROCDL.td b/mlir/lib/Conversion/GPUToROCDL/GPUToROCDL.td new file mode 100644 --- /dev/null +++ b/mlir/lib/Conversion/GPUToROCDL/GPUToROCDL.td @@ -0,0 +1,21 @@ +//==-- GPUToROCDL.td - GPU Ops to ROCDL Patterns -------------*- tablegen -*==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines Patterns to lower GPU ops to ROCDL. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_GPUTOROCDL_TD +#define MLIR_CONVERSION_GPUTOROCDL_TD + +include "mlir/Dialect/GPU/GPUOps.td" +include "mlir/Dialect/LLVMIR/ROCDLOps.td" + +def : Pat<(GPU_BarrierOp), (ROCDL_BarrierOp)>; + +#endif // MLIR_CONVERSION_GPUTOROCDL_TD diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -32,6 +32,9 @@ namespace { +/// Import the GPU Ops to ROCDL Patterns. +#include "GPUToROCDL.cpp.inc" + // A pass that replaces all occurrences of GPU device operations with their // corresponding ROCDL equivalent. // @@ -71,6 +74,7 @@ void mlir::populateGpuToROCDLConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns) { + populateWithGenerated(converter.getDialect()->getContext(), &patterns); patterns.insert< GPUIndexIntrinsicOpLowering, diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -43,6 +43,17 @@ // ----- gpu.module @test_module { + // CHECK-LABEL: func @gpu_sync() + func @gpu_sync() { + // CHECK: rocdl.barrier + gpu.barrier + std.return + } +} + +// ----- + +gpu.module @test_module { // CHECK: llvm.func @__ocml_fabs_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @__ocml_fabs_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_fabs diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir --- a/mlir/test/Dialect/LLVMIR/rocdl.mlir +++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir @@ -28,3 +28,9 @@ %11 = rocdl.grid.dim.z : !llvm.i32 llvm.return %0 : !llvm.i32 } + +func @rocdl.barrier() { + // CHECK: rocdl.barrier + rocdl.barrier + llvm.return +} diff --git a/mlir/test/Target/rocdl.mlir b/mlir/test/Target/rocdl.mlir --- a/mlir/test/Target/rocdl.mlir +++ b/mlir/test/Target/rocdl.mlir @@ -33,3 +33,11 @@ // CHECK-LABEL: amdgpu_kernel void @kernel_func llvm.return } + +llvm.func @rocdl.barrier() { + // CHECK: fence syncscope("workgroup") release + // CHECK-NEXT: call void @llvm.amdgcn.s.barrier() + // CHECK-NEXT: fence syncscope("workgroup") acquire + rocdl.barrier + llvm.return +}