diff --git a/mlir/include/mlir/Dialect/EmitC/CMakeLists.txt b/mlir/include/mlir/Dialect/EmitC/CMakeLists.txt --- a/mlir/include/mlir/Dialect/EmitC/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/EmitC/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(IR) +add_subdirectory(Transforms) diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitCBase.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitCBase.td --- a/mlir/include/mlir/Dialect/EmitC/IR/EmitCBase.td +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitCBase.td @@ -31,6 +31,15 @@ let hasConstantMaterializer = 1; let useDefaultTypePrinterParser = 1; let useDefaultAttributePrinterParser = 1; + let hasOperationAttrVerify = 1; + let extraClassDeclaration = [{ + /// Get the name of the attribute used for emitc function attributes. + static StringRef getFuncAttrName() { return "emitc.func_attr"; } + /// Get the attribute string used for GPU kernels. + static StringRef getKernelAttrString() { return "__global__"; } + /// Get the attribute string used for GPU device functions + static StringRef getDeviceFuncAttrString() { return "__device__"; } + }]; } #endif // MLIR_DIALECT_EMITC_IR_EMITCBASE diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitCTypes.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitCTypes.td --- a/mlir/include/mlir/Dialect/EmitC/IR/EmitCTypes.td +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitCTypes.td @@ -72,4 +72,14 @@ let assemblyFormat = "`<` qualified($pointee) `>`"; } +def EmitC_StringType : EmitC_Type<"String", "string"> { + let summary = "EmitC string type"; + + let description = [{ + A string data type used for function attributes. + }]; + + let parameters = (ins); +} + #endif // MLIR_DIALECT_EMITC_IR_EMITCTYPES diff --git a/mlir/include/mlir/Dialect/EmitC/Transforms/CMakeLists.txt b/mlir/include/mlir/Dialect/EmitC/Transforms/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/EmitC/Transforms/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name EmitC) +add_public_tablegen_target(MLIREmitCTransformsIncGen) + +add_mlir_doc(Passes EmitCPasses ./ -gen-pass-doc) diff --git a/mlir/include/mlir/Dialect/EmitC/Transforms/Passes.h b/mlir/include/mlir/Dialect/EmitC/Transforms/Passes.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/EmitC/Transforms/Passes.h @@ -0,0 +1,38 @@ +//===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_EMITC_TRANSFORMS_PASSES_H_ +#define MLIR_DIALECT_EMITC_TRANSFORMS_PASSES_H_ + +#include "mlir/Pass/Pass.h" + +namespace mlir { +namespace emitc { + +#define GEN_PASS_DECL +#include "mlir/Dialect/EmitC/Transforms/Passes.h.inc" + +/// Add patterns to for appending GPU function attribute strings. +void populateEmitCAddGPUFuncAttributesPatterns(RewritePatternSet &patterns); + +/// Create a pass to add function attribute strings ("__global__" or +/// "__device__") to GPU functions in preparation for C code generation. +std::unique_ptr createEmitCAddGPUFuncAttributesPass(); + +//===----------------------------------------------------------------------===// +// Registration +//===----------------------------------------------------------------------===// + +/// Generate the code for registering passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/EmitC/Transforms/Passes.h.inc" + +} // namespace emitc +} // namespace mlir + +#endif // MLIR_DIALECT_EMITC_TRANSFORMS_PASSES_H_ diff --git a/mlir/include/mlir/Dialect/EmitC/Transforms/Passes.td b/mlir/include/mlir/Dialect/EmitC/Transforms/Passes.td new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/EmitC/Transforms/Passes.td @@ -0,0 +1,30 @@ +//===-- Passes.td - EmitC pass definition file -------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_EMITC_TRANSFORMS_PASSES +#define MLIR_DIALECT_EMITC_TRANSFORMS_PASSES + +include "mlir/Pass/PassBase.td" + +def EmitCAddGPUFuncAttributes : Pass<"emitc-add-gpu-func-attributes", + "ModuleOp"> { + let summary = "Add string attributes for GPU functions"; + let description = [{ + Add strings to the (discardable) emitc.func_attr attribute corresponding to + the appropriate gpu.func type (kernel or device). For common GPU kernel + dialects (e.g. NVIDIA CUDA and AMD HIP, kernel functions are annotated with + the __global__ specifier, and device functions are annotated with the + __device__ specifier. This pass adds the corresponding strings to the + (discardable) emitc.func_attr attribute of any gpu.func operations that + are encountered. + }]; + let constructor = "mlir::emitc::createEmitCAddGPUFuncAttributesPass()"; + let dependentDialects = ["emitc::EmitCDialect"]; +} + +#endif // MLIR_DIALECT_EMITC_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h @@ -22,6 +22,7 @@ #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/SymbolTable.h" +#include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/InferIntRangeInterface.h" #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -20,6 +20,7 @@ include "mlir/IR/EnumAttr.td" include "mlir/IR/FunctionInterfaces.td" include "mlir/IR/SymbolInterfaces.td" +include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/DataLayoutInterfaces.td" include "mlir/Interfaces/InferIntRangeInterface.td" include "mlir/Interfaces/InferTypeOpInterface.td" @@ -656,7 +657,7 @@ } def GPU_ReturnOp : GPU_Op<"return", [HasParent<"GPUFuncOp">, Pure, - Terminator]>, + Terminator, ReturnLike]>, Arguments<(ins Variadic:$operands)>, Results<(outs)> { let summary = "Terminator for GPU functions."; let description = [{ diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h --- a/mlir/include/mlir/InitAllPasses.h +++ b/mlir/include/mlir/InitAllPasses.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/Arith/Transforms/Passes.h" #include "mlir/Dialect/Async/Passes.h" #include "mlir/Dialect/Bufferization/Transforms/Passes.h" +#include "mlir/Dialect/EmitC/Transforms/Passes.h" #include "mlir/Dialect/Func/Transforms/Passes.h" #include "mlir/Dialect/GPU/Transforms/Passes.h" #include "mlir/Dialect/LLVMIR/Transforms/Passes.h" @@ -59,6 +60,7 @@ registerAsyncPasses(); arith::registerArithPasses(); bufferization::registerBufferizationPasses(); + emitc::registerEmitCPasses(); func::registerFuncPasses(); registerGPUPasses(); registerGpuSerializeToCubinPass(); diff --git a/mlir/lib/Dialect/EmitC/CMakeLists.txt b/mlir/lib/Dialect/EmitC/CMakeLists.txt --- a/mlir/lib/Dialect/EmitC/CMakeLists.txt +++ b/mlir/lib/Dialect/EmitC/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(IR) +add_subdirectory(Transforms) diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp --- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp +++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp @@ -44,6 +44,20 @@ return builder.create(loc, type, value); } +LogicalResult EmitCDialect::verifyOperationAttribute(Operation *op, + NamedAttribute attr) { + if (attr.getName() != getFuncAttrName()) + return op->emitError() << "attribute '" << attr.getName() + << "' not supported by the emitc dialect"; + auto stringElem = dyn_cast(attr.getValue()); + if (!stringElem || !stringElem.getType().hasRank() || + (1 != stringElem.getType().getRank())) + return op->emitError() << "attribute '" << attr.getName() + << "' must be of type DenseStringElements " + << "with rank 1"; + return success(); +} + //===----------------------------------------------------------------------===// // ApplyOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/EmitC/Transforms/AddGPUFuncAttributes.cpp b/mlir/lib/Dialect/EmitC/Transforms/AddGPUFuncAttributes.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/EmitC/Transforms/AddGPUFuncAttributes.cpp @@ -0,0 +1,57 @@ +//===- AddGPUFuncAttributes.cpp - Pass to add GPU function attributes -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/EmitC/Transforms/Passes.h" + +#include "mlir/Dialect/EmitC/IR/EmitC.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" + +namespace mlir { +namespace emitc { +#define GEN_PASS_DEF_EMITCADDGPUFUNCATTRIBUTES +#include "mlir/Dialect/EmitC/Transforms/Passes.h.inc" +} // namespace emitc +} // namespace mlir + +using namespace mlir; +using namespace mlir::emitc; + +namespace { + +struct EmitCAddGPUFuncAttributesPass + : public emitc::impl::EmitCAddGPUFuncAttributesBase< + EmitCAddGPUFuncAttributesPass> { + void runOnOperation() override { + ModuleOp module = getOperation(); + module.walk([&](gpu::GPUFuncOp gpuFunc) { + SmallVector newAttrs; + // Retain any previously existing function attribute strings. + auto prevAttrs = gpuFunc->getAttrOfType( + EmitCDialect::getFuncAttrName()); + if (prevAttrs) { + newAttrs.append(prevAttrs.getRawStringData().begin(), + prevAttrs.getRawStringData().end()); + } + // Add a function attribute string based on whether or not the GPU + // function is a kernel. + newAttrs.push_back(gpuFunc.isKernel() + ? EmitCDialect::getKernelAttrString() + : EmitCDialect::getDeviceFuncAttrString()); + auto attrType = + RankedTensorType::get({static_cast(newAttrs.size())}, + emitc::StringType::get(gpuFunc->getContext())); + gpuFunc->setAttr(EmitCDialect::getFuncAttrName(), + DenseStringElementsAttr::get(attrType, newAttrs)); + }); + } +}; +} // end anonymous namespace + +std::unique_ptr mlir::emitc::createEmitCAddGPUFuncAttributesPass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/EmitC/Transforms/CMakeLists.txt b/mlir/lib/Dialect/EmitC/Transforms/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/EmitC/Transforms/CMakeLists.txt @@ -0,0 +1,17 @@ +add_mlir_dialect_library(MLIREmitCTransforms + AddGPUFuncAttributes.cpp + + ADDITIONAL_HEADER_DIRS + {$MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/EmitC/Transforms + + DEPENDS + MLIREmitCTransformsIncGen + + LINK_LIBS PUBLIC + MLIREmitCDialect + MLIRGPUOps + MLIRIR + MLIRPass + MLIRTransforms + MLIRTransformUtils + ) diff --git a/mlir/lib/Target/Cpp/TranslateRegistration.cpp b/mlir/lib/Target/Cpp/TranslateRegistration.cpp --- a/mlir/lib/Target/Cpp/TranslateRegistration.cpp +++ b/mlir/lib/Target/Cpp/TranslateRegistration.cpp @@ -10,6 +10,7 @@ #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" #include "mlir/Dialect/EmitC/IR/EmitC.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/IR/BuiltinOps.h" @@ -45,6 +46,7 @@ cf::ControlFlowDialect, emitc::EmitCDialect, func::FuncDialect, + gpu::GPUDialect, math::MathDialect, scf::SCFDialect>(); // clang-format on diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -10,6 +10,7 @@ #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/EmitC/IR/EmitC.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" @@ -563,8 +564,8 @@ return success(); } -static LogicalResult printOperation(CppEmitter &emitter, - func::ReturnOp returnOp) { +static LogicalResult printReturnLikeOperation(CppEmitter &emitter, + Operation &returnOp) { raw_ostream &os = emitter.ostream(); os << "return"; switch (returnOp.getNumOperands()) { @@ -575,7 +576,7 @@ return success(emitter.hasValueInScope(returnOp.getOperand(0))); default: os << " std::make_tuple("; - if (failed(emitter.emitOperandsAndAttributes(*returnOp.getOperation()))) + if (failed(emitter.emitOperandsAndAttributes(returnOp))) return failure(); os << ")"; return success(); @@ -593,7 +594,23 @@ } static LogicalResult printOperation(CppEmitter &emitter, - func::FuncOp functionOp) { + gpu::GPUModuleOp moduleOp) { + CppEmitter::Scope scope(emitter); + + for (Operation &op : moduleOp) { + if (failed(emitter.emitOperation(op, /*trailingSemicolon=*/false))) + return failure(); + } + return success(); +} + +static LogicalResult printOperation(CppEmitter &emitter, + gpu::ModuleEndOp moduleEndOp) { + return success(); +} + +static LogicalResult printOperation(CppEmitter &emitter, + FunctionOpInterface functionOp) { // We need to declare variables at top if the function has multiple blocks. if (!emitter.shouldDeclareVariablesAtTop() && functionOp.getBlocks().size() > 1) { @@ -603,8 +620,17 @@ CppEmitter::Scope scope(emitter); raw_indented_ostream &os = emitter.ostream(); - if (failed(emitter.emitTypes(functionOp.getLoc(), - functionOp.getFunctionType().getResults()))) + // Emit any function attributes if the (discardable) emitc attribute is + // present. + if (auto func_attr = functionOp->getAttrOfType( + EmitCDialect::getFuncAttrName())) { + for (auto &attr : func_attr.getRawStringData()) { + os << attr << "\n"; + } + } + + if (failed( + emitter.emitTypes(functionOp.getLoc(), functionOp.getResultTypes()))) return failure(); os << " " << functionOp.getName(); @@ -937,8 +963,11 @@ .Case( [&](auto op) { return printOperation(*this, op); }) + // Ops implementing FunctionOpInterface + .Case( + [&](auto op) { return printOperation(*this, op); }) // Func ops. - .Case( + .Case( [&](auto op) { return printOperation(*this, op); }) // SCF ops. .Case( @@ -946,8 +975,15 @@ // Arithmetic ops. .Case( [&](auto op) { return printOperation(*this, op); }) - .Default([&](Operation *) { - return op.emitOpError("unable to find printer for op"); + // GPU ops. + .Case( + [&](auto op) { return printOperation(*this, op); }) + // Others... + .Default([&](Operation *) -> LogicalResult { + if (op.hasTrait()) + return printReturnLikeOperation(*this, op); + else + return op.emitOpError("unable to find printer for op"); }); if (failed(status)) diff --git a/mlir/test/Dialect/EmitC/gpu_func_attr.mlir b/mlir/test/Dialect/EmitC/gpu_func_attr.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/EmitC/gpu_func_attr.mlir @@ -0,0 +1,14 @@ +// RUN: mlir-opt --emitc-add-gpu-func-attributes %s | FileCheck %s + +module attributes {gpu.container_module} { + gpu.module @kernels { + // CHECK: gpu.func @device_func_1([[ARG0:[^ ]*]]: f32, [[ARG1:[^ ]*]]: !emitc.ptr) attributes {emitc.func_attr = dense<"__device__"> : tensor<1x!emitc.string>} { + gpu.func @device_func_1(%arg0 : f32, %arg1 : !emitc.ptr) { + gpu.return + } + // CHECK: gpu.func @kernel_1([[ARG0:[^ ]*]]: f32, [[ARG1:[^ ]*]]: !emitc.ptr) kernel attributes {emitc.func_attr = dense<["extern \22C\22", "__global__"]> : tensor<2x!emitc.string>} { + gpu.func @kernel_1(%arg0 : f32, %arg1 : !emitc.ptr) kernel attributes {emitc.func_attr = dense<"extern \"C\""> : tensor<1x!emitc.string>} { + gpu.return + } + } +} \ No newline at end of file diff --git a/mlir/test/Target/Cpp/common-cpp.mlir b/mlir/test/Target/Cpp/common-cpp.mlir --- a/mlir/test/Target/Cpp/common-cpp.mlir +++ b/mlir/test/Target/Cpp/common-cpp.mlir @@ -82,8 +82,13 @@ return %2 : !emitc.opaque<"status_t"> } -func.func @apply(%arg0: i32) -> !emitc.ptr { - // CHECK: int32_t* [[V2]] = &[[V1]]; +// Note: FileCheck will interpret [[nodiscard]] as a variable, so we use +// __attribute__ below. +// CHECK: static +// CHECK: __attribute__((warn_unused_result)) +// CHECK: int32_t* apply(int32_t [[V1:[^ ]*]]) { +func.func @apply(%arg0: i32) -> !emitc.ptr attributes {emitc.func_attr = dense<["static", "__attribute__((warn_unused_result))"]> : tensor<2x!emitc.string>} { + // CHECK: int32_t* [[V2:[^ ]*]] = &[[V1]]; %0 = emitc.apply "&"(%arg0) : (i32) -> !emitc.ptr // CHECK: int32_t [[V3]] = *[[V2]]; %1 = emitc.apply "*"(%0) : (!emitc.ptr) -> (i32) diff --git a/mlir/test/Target/Cpp/gpu_ops.mlir b/mlir/test/Target/Cpp/gpu_ops.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Target/Cpp/gpu_ops.mlir @@ -0,0 +1,18 @@ +// RUN: mlir-translate -mlir-to-cpp %s | FileCheck %s + +module attributes {gpu.container_module} { + gpu.module @kernels { + // CHECK: __device__ + // CHECK: void device_func_1(float [[V1:.*]], float* [[V2:.*]]) { + gpu.func @device_func_1(%arg0 : f32, %arg1 : !emitc.ptr) + attributes { emitc.func_attr = dense<["__device__"]> : tensor<1x!emitc.string> } { + gpu.return + } + // CHECK: __global__ + // CHECK: void kernel_1(float [[V1:.*]], float* [[V2:.*]]) { + gpu.func @kernel_1(%arg0 : f32, %arg1 : !emitc.ptr) kernel + attributes { emitc.func_attr = dense<["__global__"]> : tensor<1x!emitc.string> } { + gpu.return + } + } +}