diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -18,12 +18,9 @@ include "mlir/IR/SymbolInterfaces.td" include "mlir/Interfaces/SideEffectInterfaces.td" -// Type constraint accepting standard integers, indices and wrapped LLVM integer -// types. -def IntLikeOrLLVMInt : TypeConstraint< - Or<[AnySignlessInteger.predicate, Index.predicate, - LLVM_AnyInteger.predicate]>, - "integer, index or LLVM dialect equivalent">; +// Type constraint accepting standard integers, indices. +def IntOrIndex : TypeConstraint< + Or<[AnySignlessInteger.predicate, Index.predicate]>, "integer or index">; //===----------------------------------------------------------------------===// // GPU Dialect operations. @@ -299,9 +296,9 @@ } def GPU_LaunchFuncOp : GPU_Op<"launch_func">, - Arguments<(ins IntLikeOrLLVMInt:$gridSizeX, IntLikeOrLLVMInt:$gridSizeY, - IntLikeOrLLVMInt:$gridSizeZ, IntLikeOrLLVMInt:$blockSizeX, - IntLikeOrLLVMInt:$blockSizeY, IntLikeOrLLVMInt:$blockSizeZ, + Arguments<(ins IntOrIndex:$gridSizeX, IntOrIndex:$gridSizeY, + IntOrIndex:$gridSizeZ, IntOrIndex:$blockSizeX, + IntOrIndex:$blockSizeY, IntOrIndex:$blockSizeZ, Variadic:$operands)>, Results<(outs)> { let summary = "Launches a function as a GPU kerneel"; @@ -333,7 +330,7 @@ // This module creates a separate compilation unit for the GPU compiler. gpu.module @kernels { - func @kernel_1(%arg0 : f32, %arg1 : !llvm<"float*">) + func @kernel_1(%arg0 : f32, %arg1 : memref) attributes { nvvm.kernel = true } { // Operations that produce block/thread IDs and dimensions are @@ -365,7 +362,7 @@ %arg0, %arg1) // Arguments passed to the kernel. { kernel_module = @kernels, // Module containing the kernel. kernel = "kernel_1" } // Kernel function. - : (index, index, index, index, index, index, f32, !llvm<"float*">) + : (index, index, index, index, index, index, f32, memref) -> () } ``` diff --git a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir --- a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir @@ -3,34 +3,48 @@ module attributes {gpu.container_module} { - // CHECK: llvm.mlir.global internal constant @[[kernel_name:.*]]("kernel\00") - // CHECK: llvm.mlir.global internal constant @[[global:.*]]("CUBIN") - // ROCDL: llvm.mlir.global internal constant @[[global:.*]]("HSACO") + // CHECK: llvm.mlir.global internal constant @[[KERNEL_NAME:.*]]("kernel\00") + // CHECK: llvm.mlir.global internal constant @[[GLOBAL:.*]]("CUBIN") + // ROCDL: llvm.mlir.global internal constant @[[GLOBAL:.*]]("HSACO") - gpu.module @kernel_module attributes {nvvm.cubin = "CUBIN", rocdl.hsaco = "HSACO"} { - llvm.func @kernel(%arg0: !llvm.float, %arg1: !llvm.ptr) attributes {gpu.kernel} { + gpu.module @kernel_module attributes { + nvvm.cubin = "CUBIN", rocdl.hsaco = "HSACO" + } { + llvm.func @kernel(%arg0: !llvm.i32, %arg1: !llvm.ptr, + %arg2: !llvm.ptr, %arg3: !llvm.i64, %arg4: !llvm.i64, + %arg5: !llvm.i64) attributes {gpu.kernel} { llvm.return } } - llvm.func @foo() { - %0 = "op"() : () -> (!llvm.float) - %1 = "op"() : () -> (!llvm.ptr) - %cst = llvm.mlir.constant(8 : index) : !llvm.i64 - - // CHECK: %[[addressof:.*]] = llvm.mlir.addressof @[[global]] - // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) - // CHECK: %[[binary:.*]] = llvm.getelementptr %[[addressof]][%[[c0]], %[[c0]]] - // CHECK-SAME: -> !llvm.ptr - // CHECK: %[[module:.*]] = llvm.call @mgpuModuleLoad(%[[binary]]) : (!llvm.ptr) -> !llvm.ptr - // CHECK: %[[func:.*]] = llvm.call @mgpuModuleGetFunction(%[[module]], {{.*}}) : (!llvm.ptr, !llvm.ptr) -> !llvm.ptr - // CHECK: llvm.call @mgpuStreamCreate - // CHECK: llvm.call @mgpuLaunchKernel - // CHECK: llvm.call @mgpuStreamSynchronize - "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernel_module::@kernel } - : (!llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.float, !llvm.ptr) -> () - - llvm.return + func @foo(%buffer: memref) { + %c8 = constant 8 : index + %c32 = constant 32 : i32 + "gpu.launch_func"(%c8, %c8, %c8, %c8, %c8, %c8, %c32, %buffer) { + kernel = @kernel_module::@kernel + } : (index, index, index, index, index, index, i32, memref) -> () + return } + // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : !llvm.i64 + // CHECK: [[ADDRESSOF:%.*]] = llvm.mlir.addressof @[[GLOBAL]] + // CHECK: [[C0:%.*]] = llvm.mlir.constant(0 : index) + // CHECK: [[BINARY:%.*]] = llvm.getelementptr [[ADDRESSOF]]{{\[}}[[C0]], [[C0]]] + // CHECK-SAME: -> !llvm.ptr + + // CHECK: [[MODULE:%.*]] = llvm.call @mgpuModuleLoad([[BINARY]]) + // CHECK: [[FUNC:%.*]] = llvm.call @mgpuModuleGetFunction([[MODULE]], {{.*}}) + + // CHECK: [[C0_I32:%.*]] = llvm.mlir.constant(0 : i32) + // CHECK: [[STREAM:%.*]] = llvm.call @mgpuStreamCreate + + // CHECK: [[NUM_PARAMS:%.*]] = llvm.mlir.constant(6 : i32) : !llvm.i32 + // CHECK-NEXT: [[PARAMS:%.*]] = llvm.alloca [[NUM_PARAMS]] x !llvm.ptr + + // CHECK: [[EXTRA_PARAMS:%.*]] = llvm.mlir.null : !llvm.ptr> + + // CHECK: llvm.call @mgpuLaunchKernel([[FUNC]], [[C8]], [[C8]], [[C8]], + // CHECK-SAME: [[C8]], [[C8]], [[C8]], [[C0_I32]], [[STREAM]], + // CHECK-SAME: [[PARAMS]], [[EXTRA_PARAMS]]) + // CHECK: llvm.call @mgpuStreamSynchronize }