diff --git a/mlir/include/mlir/Dialect/GPU/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/GPU/IR/CMakeLists.txt --- a/mlir/include/mlir/Dialect/GPU/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/GPU/IR/CMakeLists.txt @@ -16,6 +16,11 @@ mlir_tablegen(GPUOpsEnums.cpp.inc -gen-enum-defs) add_public_tablegen_target(MLIRGPUOpsEnumsGen) +set(LLVM_TARGET_DEFINITIONS CompilationAttrInterfaces.td) +mlir_tablegen(CompilationAttrInterfaces.h.inc -gen-attr-interface-decls) +mlir_tablegen(CompilationAttrInterfaces.cpp.inc -gen-attr-interface-defs) +add_public_tablegen_target(MLIRGPUCompilationAttrInterfacesIncGen) + set(LLVM_TARGET_DEFINITIONS GPUOps.td) mlir_tablegen(GPUOpsAttributes.h.inc -gen-attrdef-decls -attrdefs-dialect=gpu) mlir_tablegen(GPUOpsAttributes.cpp.inc -gen-attrdef-defs -attrdefs-dialect=gpu) diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td @@ -0,0 +1,116 @@ +//===-- CompilationAttrInterfaces.td - GPU compilation interfaces ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines interfaces for GPU target attributes & GPU object manager +// attributes. +// +//===----------------------------------------------------------------------===// + +#ifndef GPU_COMPILATIONATTRINTERFACES +#define GPU_COMPILATIONATTRINTERFACES + +include "mlir/IR/AttrTypeBase.td" +include "mlir/IR/OpBase.td" + +//===----------------------------------------------------------------------===// +// GPU target attribute interface. +//===----------------------------------------------------------------------===// + +def GPUTargetAttrInterface : AttrInterface<"TargetAttrInterface"> { + let description = [{ + Interface for GPU target attributes. Attributes implementing this interface + compile GPU modules into binary objects, providing an opaque interface to + hide implementation details. + }]; + let cppNamespace = "::mlir::gpu"; + let methods = [ + InterfaceMethod<[{ + Serializes a GPU module to a string containing a representation of the + module. + + All attributes implementing this interface must implement this method. + If serialization fails then the method should return `std::nullopt`. + + The `options` argument is meant to be used for passing additional + options that are not in the attribute. + }], + "std::optional>", "serializeToObject", + (ins "Operation*":$module, "const gpu::TargetOptions&":$options) + > + ]; +} + +def ImplementsTargetAttrInterface : AttrConstraint< + CPred<"$_self.hasTrait<::mlir::gpu::TargetAttrInterface::Trait>()">, + "Attribute implementing the `TargetAttrInterface` interface." +>; + +def GPUTargetAttr : ConfinedAttr { + let description = [{ + Generic target attribute implementing the `TargetAttrInterface` interface. + }]; +} + +def GPUTargetArrayAttr : + TypedArrayAttrBase; + +def GPUNonEmptyTargetArrayAttr : + ConfinedAttr]>; + +//===----------------------------------------------------------------------===// +// GPU object manager attribute interface. +//===----------------------------------------------------------------------===// + +def GPUObjectManagerAttrInterface : + AttrInterface<"ObjectManagerAttrInterface"> { + let description = [{ + Interface for GPU object manager attributes. Attributes implementing this + interface manage the interaction between GPU objects and host IR. + }]; + let cppNamespace = "::mlir::gpu"; + let methods = [ + InterfaceMethod<[{ + Embeds a GPU object into a host LLVM module. The operation expected by + this method must be a GPU BinaryOp. + + All attributes implementing this interface must implement this method. + If the method fails then it must return `failure`. + }], + "LogicalResult", "embedBinary", + (ins "Operation*":$binaryOp, "llvm::IRBuilderBase&":$hostBuilder, + "LLVM::ModuleTranslation&":$hostModuleTranslation) + >, + InterfaceMethod<[{ + Launches a kernel inside a binary. The first argument must be a GPU + LaunchFuncOp, while the second one a GPU BinaryOp. + + All attributes implementing this interface must implement this method. + If the method fails then it must return `failure`. + }], + "LogicalResult", "launchKernel", + (ins "Operation*":$launchFunc, "Operation*":$binaryOp, + "llvm::IRBuilderBase&":$hostBuilder, + "LLVM::ModuleTranslation&":$hostModuleTranslation) + > + ]; +} + +def ImplementsObjectManagerAttrInterface : AttrConstraint< + CPred<"$_self.hasTrait<::mlir::gpu::ObjectManagerAttrInterface::Trait>()">, + "Attribute implementing the `ObjectManagerAttrInterface` interface." +>; + +def GPUObjectManagerAttr : + ConfinedAttr { + let description = [{ + Generic compilation attribute implementing the `ObjectManagerAttrInterface` + interface. + }]; +} + +#endif // GPU_COMPILATIONATTRINTERFACES diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUCompilationAttr.td b/mlir/include/mlir/Dialect/GPU/IR/GPUCompilationAttr.td new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUCompilationAttr.td @@ -0,0 +1,182 @@ +//===-- GPUTargetAttr.td - GPU compilation attributes ------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the GPU NVPTX & AMGDPU target attributes. +// +//===----------------------------------------------------------------------===// + +#ifndef GPU_COMPILATIONATTR +#define GPU_COMPILATIONATTR + +include "mlir/Dialect/GPU/IR/GPUBase.td" +include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" + +//===----------------------------------------------------------------------===// +// GPU NVPTX target attribute. +//===----------------------------------------------------------------------===// + +def GPU_NVPTXTargetAttr : GPU_Attr<"NVPTXTarget", "nvptx", [ + DeclareAttrInterfaceMethods + ]> { + let description = [{ + NVPTX target attribute for controlling compilation of NVIDIA targets. All + parameters decay into default values if not present. + + Examples: + + 1. Target with default values. + ``` + gpu.module @mymodule [#gpu.nvptx] attributes {...} { + ... + } + ``` + + 2. Target with `sm_90` chip and fast math. + ``` + gpu.module @mymodule [#gpu.nvptx] { + ... + } + ``` + }]; + let parameters = (ins + DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O, + StringRefParameter<"Target triple.", "\"nvptx64-nvidia-cuda\"">:$triple, + StringRefParameter<"Target chip.", "\"sm_50\"">:$chip, + StringRefParameter<"Target chip features.", "\"+ptx60\"">:$features, + OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags, + OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link + ); + let assemblyFormat = [{ + (`<` struct($O, $triple, $chip, $features, $flags)^ `>`)? + }]; + let builders = [ + AttrBuilder<(ins CArg<"int", "2">:$optLevel, + CArg<"StringRef", "\"nvptx64-nvidia-cuda\"">:$triple, + CArg<"StringRef", "\"sm_50\"">:$chip, + CArg<"StringRef", "\"+ptx60\"">:$features, + CArg<"DictionaryAttr", "nullptr">:$targetFlags, + CArg<"ArrayAttr", "nullptr">:$linkFiles), [{ + return Base::get($_ctxt, optLevel, triple, chip, features, targetFlags, linkFiles); + }]> + ]; + let skipDefaultBuilders = 1; + let genVerifyDecl = 1; + let extraClassDeclaration = [{ + bool hasFlag(StringRef flag) const; + bool getFastMath() const; + bool getFtz() const; + }]; + let extraClassDefinition = [{ + bool $cppClass::hasFlag(StringRef flag) const { + if (DictionaryAttr flags = getFlags()) + return flags.get(flag) != nullptr; + return false; + } + bool $cppClass::getFastMath() const { + return hasFlag("fast"); + } + bool $cppClass::getFtz() const { + return hasFlag("ftz"); + } + }]; +} + +//===----------------------------------------------------------------------===// +// GPU AMDGPU target attribute. +//===----------------------------------------------------------------------===// + +def GPU_AMDGPUTargetAttr : GPU_Attr<"AMDGPUTarget", "amdgpu", [ + DeclareAttrInterfaceMethods + ]> { + let description = [{ + AMDGPU target attribute for controlling compilation of AMDGPU targets. All + parameters decay into default values if not present. + + Examples: + + 1. Target with default values. + ``` + gpu.module @mymodule [#gpu.amdgpu] attributes {...} { + ... + } + ``` + + 2. Target with `gfx90a` chip and fast math. + ``` + gpu.module @mymodule [#gpu.amdgpu] { + ... + } + ``` + }]; + let parameters = (ins + DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O, + StringRefParameter<"Target triple.", "\"amdgcn-amd-amdhsa\"">:$triple, + StringRefParameter<"Target chip.", "\"gfx900\"">:$chip, + StringRefParameter<"Target chip features.", "\"\"">:$features, + StringRefParameter<"ABI version.", "\"500\"">:$abi, + OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags, + OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link + ); + let assemblyFormat = [{ + (`<` struct($O, $triple, $chip, $features, $abi, $flags)^ `>`)? + }]; + let builders = [ + AttrBuilder<(ins CArg<"int", "2">:$optLevel, + CArg<"StringRef", "\"amdgcn-amd-amdhsa\"">:$triple, + CArg<"StringRef", "\"gfx900\"">:$chip, + CArg<"StringRef", "\"\"">:$features, + CArg<"StringRef", "\"500\"">:$abiVersion, + CArg<"DictionaryAttr", "nullptr">:$targetFlags, + CArg<"ArrayAttr", "nullptr">:$linkFiles), [{ + return Base::get($_ctxt, optLevel, triple, chip, features, abiVersion, + targetFlags, linkFiles); + }]> + ]; + let skipDefaultBuilders = 1; + let genVerifyDecl = 1; + let extraClassDeclaration = [{ + bool hasFlag(StringRef flag) const; + bool getWave64() const; + bool getFastMath() const; + bool getDaz() const; + bool getFiniteOnly() const; + bool getUnsafeMath() const; + bool getCorrectSqrt() const; + }]; + let extraClassDefinition = [{ + bool $cppClass::hasFlag(StringRef flag) const { + if (DictionaryAttr flags = getFlags()) + return flags.get(flag) != nullptr; + return false; + } + bool $cppClass::getWave64() const { + return hasFlag("wave64") || !hasFlag("no_wave64"); + } + bool $cppClass::getFastMath() const { + return hasFlag("fast"); + } + bool $cppClass::getDaz() const { + return hasFlag("daz"); + } + bool $cppClass::getFiniteOnly() const { + return hasFlag("finite_only"); + } + bool $cppClass::getUnsafeMath() const { + return hasFlag("unsafe_math"); + } + bool $cppClass::getCorrectSqrt() const { + return !hasFlag("unsafe_sqrt"); + } + }]; +} + +#endif // GPU_COMPILATIONATTR diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h @@ -28,7 +28,14 @@ #include "mlir/Interfaces/SideEffectInterfaces.h" #include "llvm/ADT/STLExtras.h" +namespace llvm { +class IRBuilderBase; +} + namespace mlir { +namespace LLVM { +class ModuleTranslation; +} namespace gpu { /// Utility class for the GPU dialect to represent triples of `Value`s @@ -180,9 +187,45 @@ using SparseDnTensorHandleType = SparseHandleType; using SparseSpMatHandleType = SparseHandleType; +/// This class serves as an opaque interface for passing options to the +/// `TargetAttrInterface` methods. Users of this class must implement the +/// `classof` method as well as using the macros `MLIR_*_EXPLICIT_TYPE_ID` to +/// ensure type safeness. +class TargetOptions { +public: + /// Constructor initializing the toolkit path and the list of bitcode files. + TargetOptions(StringRef toolkitPath = {}, + ArrayRef bitcodeFiles = {}); + + /// Returns the typeID. + TypeID getTypeID() const; + + /// Returns the toolkit path. + StringRef getToolkitPath() const; + + /// Returns the bitcode files to link to. + ArrayRef getBitcodeFiles() const; + +protected: + /// Derived classes must use this constructor to initialize `typeID` to the + /// appropiate value: ie. `TargetOptions(TypeID::get())`. + TargetOptions(TypeID typeID, StringRef toolkitPath = {}, + ArrayRef bitcodeFiles = {}); + + /// Path to the target toolkit. + StringRef toolkitPath; + + /// List of files to link with the LLVM module. + ArrayRef bitcodeFiles; + +private: + TypeID typeID; +}; } // namespace gpu } // namespace mlir +MLIR_DECLARE_EXPLICIT_TYPE_ID(::mlir::gpu::TargetOptions) + #include "mlir/Dialect/GPU/IR/GPUOpsEnums.h.inc" #include "mlir/Dialect/GPU/IR/GPUOpsDialect.h.inc" @@ -191,6 +234,8 @@ #include "mlir/Dialect/SCF/IR/DeviceMappingInterface.h" +#include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.h.inc" + #define GET_ATTRDEF_CLASSES #include "mlir/Dialect/GPU/IR/GPUOpsAttributes.h.inc" diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -15,6 +15,8 @@ include "mlir/Dialect/DLTI/DLTIBase.td" include "mlir/Dialect/GPU/IR/GPUBase.td" +include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" +include "mlir/Dialect/GPU/IR/GPUCompilationAttr.td" include "mlir/Dialect/GPU/IR/ParallelLoopMapperAttr.td" include "mlir/Dialect/GPU/TransformOps/GPUDeviceMappingAttr.td" include "mlir/IR/EnumAttr.td" @@ -429,14 +431,17 @@ let hasVerifier = 1; } +def LaunchIndx : AnyTypeOf<[Index, I32, I64]>; + def GPU_LaunchFuncOp : GPU_Op<"launch_func", [GPU_AsyncOpInterface, AttrSizedOperandSegments]>, Arguments<(ins Variadic:$asyncDependencies, SymbolRefAttr:$kernel, - Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ, - Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ, + LaunchIndx:$gridSizeX, LaunchIndx:$gridSizeY, LaunchIndx:$gridSizeZ, + LaunchIndx:$blockSizeX, LaunchIndx:$blockSizeY, LaunchIndx:$blockSizeZ, Optional:$dynamicSharedMemorySize, - Variadic:$kernelOperands)>, + Variadic:$kernelOperands, + Optional:$asyncObject)>, Results<(outs Optional:$asyncToken)> { let summary = "Launches a function as a GPU kernel"; @@ -527,7 +532,11 @@ "KernelDim3":$blockSize, "Value":$dynamicSharedMemorySize, "ValueRange":$kernelOperands, CArg<"Type", "nullptr">:$asyncTokenType, - CArg<"ValueRange", "{}">:$asyncDependencies)> + CArg<"ValueRange", "{}">:$asyncDependencies)>, + OpBuilder<(ins "SymbolRefAttr":$kernel, "KernelDim3":$gridSize, + "KernelDim3":$blockSize, "Value":$dynamicSharedMemorySize, + "ValueRange":$kernelOperands, + CArg<"Value", "nullptr">:$asyncObject)> ]; let extraClassDeclaration = [{ @@ -557,9 +566,10 @@ let assemblyFormat = [{ custom(type($asyncToken), $asyncDependencies) + (`<` $asyncObject^ type($asyncObject) `>`)? $kernel - `blocks` `in` ` ` `(`$gridSizeX`,` $gridSizeY`,` $gridSizeZ`)` - `threads` `in` ` ` `(`$blockSizeX`,` $blockSizeY`,` $blockSizeZ`)` + `blocks` `in` custom($gridSizeX, type($gridSizeX), $gridSizeY, type($gridSizeY), $gridSizeZ, type($gridSizeZ)) + `threads` `in` custom($blockSizeX, type($blockSizeX), $blockSizeY, type($blockSizeY), $blockSizeZ, type($blockSizeZ)) (`dynamic_shared_memory_size` $dynamicSharedMemorySize^)? custom($kernelOperands, type($kernelOperands)) attr-dict }]; @@ -998,10 +1008,10 @@ } def GPU_GPUModuleOp : GPU_Op<"module", [ - DataLayoutOpInterface, HasDefaultDLTIDataLayout, IsolatedFromAbove, - SymbolTable, Symbol, - SingleBlockImplicitTerminator<"ModuleEndOp"> -]> { + DataLayoutOpInterface, HasDefaultDLTIDataLayout, IsolatedFromAbove, + SymbolTable, Symbol, + SingleBlockImplicitTerminator<"ModuleEndOp"> + ]>, Arguments<(ins OptionalAttr:$targets)> { let summary = "A top level compilation unit containing code to be run on a GPU."; let description = [{ GPU module contains code that is intended to be run on a GPU. A host device @@ -1019,15 +1029,21 @@ or not intended to be run on the separate device. ``` - gpu.module @symbol_name { + gpu.module @symbol_name { + gpu.func {} + ... + gpu.module_end + } + gpu.module @symbol_name2 [#gpu.amdgpu] { gpu.func {} ... gpu.module_end } - ``` }]; - let builders = [OpBuilder<(ins "StringRef":$name)>]; + let builders = [ + OpBuilder<(ins "StringRef":$name, CArg<"ArrayAttr", "{}">:$targets)> + ]; let regions = (region SizedRegion<1>:$bodyRegion); let hasCustomAssemblyFormat = 1; @@ -1047,6 +1063,54 @@ let assemblyFormat = "attr-dict"; } +def GPU_ObjectAttr : GPU_Attr<"Object", "object"> { + let description = [{ + A GPU object attribute pairs a GPU target with a binary string, + encapsulating the information of how the object was generated with the + object itself. + + The target attribute must implement the `TargetAttrInterface` interface. + + ``` + #gpu.object<#gpu.nvptx, "..."> + ``` + }]; + let parameters = (ins "Attribute":$target, "StringAttr":$object); + let assemblyFormat = [{`<` $target `,` $object `>`}]; + let genVerifyDecl = 1; +} + +def GPUObjectArrayAttr : + TypedArrayAttrBase; + +def GPU_BinaryOp : GPU_Op<"binary", [Symbol, GlobalSymbol]>, + Arguments<(ins SymbolNameAttr:$sym_name, + GPUObjectManagerAttr:$objectManager, + ConfinedAttr]>:$objects)> { + let summary = "An op for storing serialized GPU binary objects."; + let description = [{ + GPU binaries provide a semantic mechanism for storing GPU objects, + e.g. the result of compiling a GPU module to an object file. + + This operation has 3 arguments: + - The name of the binary. + - An attribute implementing the `ObjectManagerAttrInterface` interface. + - An array of GPU object attributes. + + ``` + gpu.binary @myobject [#gpu.object<...>, #gpu.object<...>] + ``` + }]; + let builders = [ + OpBuilder<(ins "StringRef":$name, "Attribute":$objectManager, + "ArrayAttr":$objects)> + ]; + let skipDefaultBuilders = 1; + let assemblyFormat = [{ + $sym_name custom($objectManager) attr-dict-with-keyword $objects + }]; +} + def GPU_HostRegisterOp : GPU_Op<"host_register">, Arguments<(ins AnyUnrankedMemRef:$value)> { let summary = "Registers a memref for access from device."; diff --git a/mlir/include/mlir/ExecutionEngine/ModuleToObject.h b/mlir/include/mlir/ExecutionEngine/ModuleToObject.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/ModuleToObject.h @@ -0,0 +1,117 @@ +//===- ModuleToObject.h - Module to object base class -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the base class for transforming Operations into binary +// objects. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_MODULETOOBJECT_H +#define MLIR_EXECUTIONENGINE_MODULETOOBJECT_H + +#include "mlir/IR/Operation.h" +#include "llvm/IR/Module.h" + +namespace llvm { +class TargetMachine; +} // namespace llvm + +namespace mlir { +namespace LLVM { +class ModuleTranslation; +} +/// Utility base class for transforming Operations into binary objects, by +/// default it returns the serialized bitcode for the module. +class ModuleToObject { +public: + ModuleToObject(Operation &module, StringRef triple, StringRef chip, + StringRef features = {}, int optLevel = 3); + virtual ~ModuleToObject() = default; + + /// Returns the gpu.module being serialized. + Operation &getOperation(); + + /// Runs the serialization pipeline, returning `std::nullopt` on error. + virtual std::optional> run(); + +protected: + // Hooks to be implemented by derived classes. + + /// Hook for loading bitcode files, returns std::nullopt on failure. + virtual std::optional>> + loadBitcodeFiles(llvm::LLVMContext &context, llvm::Module &module) { + return SmallVector>(); + } + + /// Hook for performing additional actions on a loaded bitcode file. + virtual void handleBitcodeFile(llvm::Module &module, + llvm::TargetMachine &targetMachine) {} + + /// Hook for performing additional actions on the llvmModule pre linking. + virtual void handleModulePreLink(llvm::Module &module, + llvm::TargetMachine &targetMachine) {} + + /// Hook for performing additional actions on the llvmModule post linking. + virtual void handleModulePostLink(llvm::Module &module, + llvm::TargetMachine &targetMachine) {} + + /// Serializes the LLVM IR bitcode to an object file, by default it serializes + /// to LLVM bitcode. + virtual std::optional> + moduleToObject(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine); + +protected: + /// Create the target machine based on the target triple and chip. + std::unique_ptr createTargetMachine(); + + /// Loads a bitcode file from path. + std::unique_ptr loadBitcodeFile(llvm::LLVMContext &context, + StringRef path); + + /// Loads multiple bitcode files. + LogicalResult loadBitcodeFilesFromList( + llvm::LLVMContext &context, ArrayRef fileList, + SmallVector> &llvmModules, + bool failureOnError = true); + + /// Translates the gpu.module to LLVM IR. + std::unique_ptr + translateToLLVMIR(llvm::LLVMContext &llvmContext); + + /// Link the llvmModule to other bitcode file. + LogicalResult linkFiles(llvm::Module &module, + SmallVector> &&libs); + + /// Optimize the module. + LogicalResult optimizeModule(llvm::Module &module, + llvm::TargetMachine &targetMachine, int optL); + + /// Utility function for translating to ISA, returns `std::nullopt` on + /// failure. + static std::optional + translateToISA(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine); + +protected: + /// Module to transform to a binary object. + Operation &module; + + /// Target triple. + StringRef triple; + + /// Target chip. + StringRef chip; + + /// Target features. + StringRef features; + + /// Optimization level. + int optLevel; +}; +} // namespace mlir + +#endif // MLIR_EXECUTIONENGINE_MODULETOOBJECT_H diff --git a/mlir/include/mlir/IR/AttrTypeBase.td b/mlir/include/mlir/IR/AttrTypeBase.td --- a/mlir/include/mlir/IR/AttrTypeBase.td +++ b/mlir/include/mlir/IR/AttrTypeBase.td @@ -342,11 +342,12 @@ } // For StringRefs, which require allocation. -class StringRefParameter : +class StringRefParameter : AttrOrTypeParameter<"::llvm::StringRef", desc> { let allocator = [{$_dst = $_allocator.copyInto($_self);}]; let printer = [{$_printer << '"' << $_self << '"';}]; let cppStorageType = "std::string"; + let defaultValue = value; } // For APFloats, which require comparison. diff --git a/mlir/include/mlir/IR/SymbolInterfaces.td b/mlir/include/mlir/IR/SymbolInterfaces.td --- a/mlir/include/mlir/IR/SymbolInterfaces.td +++ b/mlir/include/mlir/IR/SymbolInterfaces.td @@ -219,6 +219,9 @@ // Symbol Traits //===----------------------------------------------------------------------===// +// Op defines a global symbol. +def GlobalSymbol : NativeOpTrait<"GlobalSymbol", [Symbol]>; + // Op defines a symbol table. def SymbolTable : NativeOpTrait<"SymbolTable">; diff --git a/mlir/include/mlir/IR/SymbolTable.h b/mlir/include/mlir/IR/SymbolTable.h --- a/mlir/include/mlir/IR/SymbolTable.h +++ b/mlir/include/mlir/IR/SymbolTable.h @@ -448,4 +448,22 @@ /// Include the generated symbol interfaces. #include "mlir/IR/SymbolInterfaces.h.inc" +namespace mlir { +namespace OpTrait { +/// A trait used to indicate that a symbol has a global scope. Operations using +/// this trait must also adhere to the constraints defined by the `Symbol` +/// trait. +template +class GlobalSymbol : public TraitBase { + static LogicalResult verifyTrait(Operation *op) { + static_assert(ConcreteType::template hasTrait(), + "expected operation must have zero results"); + static_assert(ConcreteType::template hasTrait(), + "expected operation must inherit the `Symbol` trait"); + return success(); + } +}; +} // namespace OpTrait +} // namespace mlir + #endif // MLIR_IR_SYMBOLTABLE_H diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -32,6 +32,7 @@ MLIRGPUOpsAttributesIncGen MLIRGPUOpsEnumsGen MLIRGPUOpInterfacesIncGen + MLIRGPUCompilationAttrInterfacesIncGen LINK_LIBS PUBLIC MLIRArithDialect @@ -41,6 +42,33 @@ MLIRMemRefDialect MLIRSideEffectInterfaces MLIRSupport + + PRIVATE + MLIRGPUTargets + ) + +add_mlir_dialect_library(MLIRGPUTargets + Targets/AMDGPUTarget.cpp + Targets/NVPTXTarget.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU + + LINK_COMPONENTS + Core + MC + Target + ${NVPTX_LIBS} + ${AMDGPU_LIBS} + + LINK_LIBS PUBLIC + MLIRIR + MLIRExecutionEngineUtils + MLIRSupport + MLIRTargetLLVMIRExport + + PRIVATE + MLIRGPUDialect ) add_mlir_dialect_library(MLIRGPUTransforms @@ -128,6 +156,35 @@ ${CUDA_DRIVER_LIBRARY} ) + # Find the CUDA toolkit. + if (NOT DEFINED CUDAToolkit_ROOT) + find_package(CUDAToolkit) + get_filename_component(CUDAToolkit_ROOT ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE) + endif() + message(VERBOSE "MLIR Default CUDA toolkit path: ${CUDAToolkit_ROOT}") + + # Enable the gpu to cubin target. + target_compile_definitions(obj.MLIRGPUTargets + PRIVATE + MLIR_GPU_NVPTX_TARGET_ENABLED=1 + __DEFAULT_CUDATOOLKIT_PATH__="${CUDAToolkit_ROOT}" + ) + # Enable the gpu to cubin target. + target_compile_definitions(obj.MLIRGPUTransforms + PRIVATE + MLIR_GPU_NVPTX_TARGET_ENABLED=1 + ) + + # Add CUDA headers includes and the libcuda.so library. + target_include_directories(obj.MLIRGPUTargets + PRIVATE + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} + ) + target_link_libraries(MLIRGPUTargets + PRIVATE + ${CUDA_DRIVER_LIBRARY} + ) + endif() if(MLIR_ENABLE_ROCM_CONVERSIONS) @@ -136,13 +193,32 @@ "Building mlir with ROCm support requires the AMDGPU backend") endif() - set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") + if (DEFINED ROCM_PATH) + set(DEFAULT_ROCM_PATH "${ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") + elseif(DEFINED ENV{ROCM_PATH}) + set(DEFAULT_ROCM_PATH "$ENV{ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") + else() + set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") + endif() + message(VERBOSE "MLIR Default ROCM toolkit path: ${DEFAULT_ROCM_PATH}") + target_compile_definitions(obj.MLIRGPUTransforms PRIVATE __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}" MLIR_GPU_TO_HSACO_PASS_ENABLE=1 ) + # Enable the gpu to amdgpu target. + target_compile_definitions(obj.MLIRGPUTargets + PRIVATE + MLIR_GPU_AMDGPU_TARGET_ENABLED=1 + __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}" + ) + target_compile_definitions(obj.MLIRGPUTransforms + PRIVATE + MLIR_GPU_AMDGPU_TARGET_ENABLED=1 + ) + target_link_libraries(MLIRGPUTransforms PRIVATE MLIRROCDLToLLVMIRTranslation diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -275,11 +275,21 @@ return success(); // Check that `launch_func` refers to a well-formed GPU kernel module. - StringAttr kernelModuleName = launchOp.getKernelModuleName(); - auto kernelModule = module.lookupSymbol(kernelModuleName); + StringAttr kernelContainerName = launchOp.getKernelModuleName(); + Operation *kernelContainer = module.lookupSymbol(kernelContainerName); + if (!kernelContainer) + return launchOp.emitOpError() + << "kernel container '" << kernelContainerName.getValue() + << "' is undefined"; + + // If the container is a GPU binary op return success. + if (isa(kernelContainer)) + return success(); + + auto kernelModule = dyn_cast(kernelContainer); if (!kernelModule) return launchOp.emitOpError() - << "kernel module '" << kernelModuleName.getValue() + << "kernel module '" << kernelContainerName.getValue() << "' is undefined"; // Check that `launch_func` refers to a well-formed kernel function. @@ -937,10 +947,36 @@ SymbolRefAttr::get(kernelModule.getNameAttr(), {SymbolRefAttr::get(kernelFunc.getNameAttr())}); result.addAttribute(getKernelAttrName(result.name), kernelSymbol); - SmallVector segmentSizes(9, 1); + SmallVector segmentSizes(10, 1); segmentSizes.front() = asyncDependencies.size(); - segmentSizes[segmentSizes.size() - 2] = dynamicSharedMemorySize ? 1 : 0; - segmentSizes.back() = static_cast(kernelOperands.size()); + segmentSizes[segmentSizes.size() - 3] = dynamicSharedMemorySize ? 1 : 0; + segmentSizes[segmentSizes.size() - 2] = + static_cast(kernelOperands.size()); + segmentSizes.back() = 0; + result.addAttribute(getOperandSegmentSizeAttr(), + builder.getDenseI32ArrayAttr(segmentSizes)); +} + +void LaunchFuncOp::build(OpBuilder &builder, OperationState &result, + SymbolRefAttr kernel, KernelDim3 gridSize, + KernelDim3 getBlockSize, Value dynamicSharedMemorySize, + ValueRange kernelOperands, Value asyncObject) { + // Add grid and block sizes as op operands, followed by the data operands. + result.addOperands({gridSize.x, gridSize.y, gridSize.z, getBlockSize.x, + getBlockSize.y, getBlockSize.z}); + if (dynamicSharedMemorySize) + result.addOperands(dynamicSharedMemorySize); + result.addOperands(kernelOperands); + if (asyncObject) + result.addOperands(asyncObject); + result.addAttribute(getKernelAttrName(result.name), kernel); + SmallVector segmentSizes(10, 1); + segmentSizes.front() = 0; + segmentSizes[segmentSizes.size() - 3] = dynamicSharedMemorySize ? 1 : 0; + segmentSizes[segmentSizes.size() - 2] = + static_cast(kernelOperands.size()); + + segmentSizes.back() = asyncObject ? 1 : 0; result.addAttribute(getOperandSegmentSizeAttr(), builder.getDenseI32ArrayAttr(segmentSizes)); } @@ -982,9 +1018,51 @@ GPUDialect::getContainerModuleAttrName() + "' attribute"); + KernelDim3 grid = getGridSizeOperandValues(); + KernelDim3 block = getBlockSizeOperandValues(); + if (grid.x.getType() != grid.y.getType() || + grid.x.getType() != grid.z.getType() || + grid.x.getType() != block.x.getType() || + grid.x.getType() != block.y.getType() || + grid.x.getType() != block.z.getType()) + return emitOpError( + "expected the grid and block sizes all having the same type"); + return success(); +} + +static ParseResult +parseDim3(OpAsmParser &parser, OpAsmParser::UnresolvedOperand &sizeX, + Type &sizeXTy, OpAsmParser::UnresolvedOperand &sizeY, Type &sizeYTy, + OpAsmParser::UnresolvedOperand &sizeZ, Type &sizeZTy) { + if (parser.parseLParen() || parser.parseOperand(sizeX) || + parser.parseComma() || parser.parseOperand(sizeY) || + parser.parseComma() || parser.parseOperand(sizeZ) || parser.parseRParen()) + return failure(); + SmallVector types; + if (failed(parser.parseOptionalColonTypeList(types))) + return failure(); + if (types.size()) { + sizeXTy = types[0]; + sizeYTy = types[0]; + sizeZTy = types[0]; + } else { + types.push_back(IndexType::get(parser.getContext())); + sizeXTy = types[0]; + sizeYTy = types[0]; + sizeZTy = types[0]; + } return success(); } +static void printDim3(OpAsmPrinter &printer, Operation *op, Value sizeX, + Type sizeXTy, Value sizeY, Type sizeYTy, Value sizeZ, + Type sizeZTy) { + printer << '(' << sizeX << ", " << sizeY << ", " << sizeZ << ')'; + IndexType indexType = IndexType::get(op->getContext()); + if (indexType != sizeXTy) + printer << " : " << sizeXTy; +} + static ParseResult parseLaunchFuncOperands( OpAsmParser &parser, SmallVectorImpl &argNames, @@ -1456,18 +1534,35 @@ //===----------------------------------------------------------------------===// void GPUModuleOp::build(OpBuilder &builder, OperationState &result, - StringRef name) { + StringRef name, ArrayAttr targets) { ensureTerminator(*result.addRegion(), builder, result.location); result.attributes.push_back(builder.getNamedAttr( ::mlir::SymbolTable::getSymbolAttrName(), builder.getStringAttr(name))); + + if (targets) + result.getOrAddProperties().targets = targets; } ParseResult GPUModuleOp::parse(OpAsmParser &parser, OperationState &result) { StringAttr nameAttr; + ArrayAttr targetsAttr; + if (parser.parseSymbolName(nameAttr, mlir::SymbolTable::getSymbolAttrName(), - result.attributes) || - // If module attributes are present, parse them. - parser.parseOptionalAttrDictWithKeyword(result.attributes)) + result.attributes)) + return failure(); + + // Parse the optional array of target attributes. + OptionalParseResult targetsAttrResult = + parser.parseOptionalAttribute(targetsAttr, Type{}); + if (targetsAttrResult.has_value()) { + if (failed(*targetsAttrResult)) { + return failure(); + } + result.getOrAddProperties().targets = targetsAttr; + } + + // If module attributes are present, parse them. + if (parser.parseOptionalAttrDictWithKeyword(result.attributes)) return failure(); // Parse the module body. @@ -1483,13 +1578,56 @@ void GPUModuleOp::print(OpAsmPrinter &p) { p << ' '; p.printSymbolName(getName()); - p.printOptionalAttrDictWithKeyword((*this)->getAttrs(), - {mlir::SymbolTable::getSymbolAttrName()}); + + if (Attribute attr = getTargetsAttr()) { + p << ' '; + p.printAttribute(attr); + p << ' '; + } + + p.printOptionalAttrDictWithKeyword( + (*this)->getAttrs(), + {mlir::SymbolTable::getSymbolAttrName(), getTargetsAttrName()}); p << ' '; p.printRegion(getRegion(), /*printEntryBlockArgs=*/false, /*printBlockTerminators=*/false); } +//===----------------------------------------------------------------------===// +// GPUBinaryOp +//===----------------------------------------------------------------------===// + +LogicalResult +ObjectAttr::verify(function_ref emitError, + Attribute target, StringAttr object) { + if (target && target.hasTrait()) + return success(); + emitError() << "The target parameter must implement `TargetAttrInterface`."; + return failure(); +} + +void BinaryOp::build(OpBuilder &builder, OperationState &result, StringRef name, + Attribute manager, ArrayAttr objects) { + auto &properties = result.getOrAddProperties(); + result.attributes.push_back(builder.getNamedAttr( + SymbolTable::getSymbolAttrName(), builder.getStringAttr(name))); + properties.objects = objects; + properties.objectManager = manager; +} + +static ParseResult parseObjectManager(OpAsmParser &parser, + Attribute &objectManager) { + if (parser.parseAttribute(objectManager)) + return failure(); + return success(); +} + +static void printObjectManager(OpAsmPrinter &printer, Operation *op, + Attribute objectManager) { + if (objectManager) + printer << '<' << objectManager << '>'; +} + //===----------------------------------------------------------------------===// // GPUMemcpyOp //===----------------------------------------------------------------------===// @@ -1772,6 +1910,28 @@ results.add(context); } +//===----------------------------------------------------------------------===// +// GPU Compilation options +//===----------------------------------------------------------------------===// + +TargetOptions::TargetOptions(StringRef toolkitPath, + ArrayRef bitcodeFiles) + : TargetOptions(TypeID::get(), toolkitPath, bitcodeFiles) {} + +TargetOptions::TargetOptions(TypeID typeID, StringRef toolkitPath, + ArrayRef bitcodeFiles) + : toolkitPath(toolkitPath), bitcodeFiles(bitcodeFiles), typeID(typeID) {} + +TypeID TargetOptions::getTypeID() const { return typeID; } + +StringRef TargetOptions::getToolkitPath() const { return toolkitPath; } + +ArrayRef TargetOptions::getBitcodeFiles() const { + return bitcodeFiles; +} + +MLIR_DEFINE_EXPLICIT_TYPE_ID(::mlir::gpu::TargetOptions) + #include "mlir/Dialect/GPU/IR/GPUOpInterfaces.cpp.inc" #include "mlir/Dialect/GPU/IR/GPUOpsEnums.cpp.inc" @@ -1780,3 +1940,5 @@ #define GET_OP_CLASSES #include "mlir/Dialect/GPU/IR/GPUOps.cpp.inc" + +#include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.cpp.inc" diff --git a/mlir/lib/Dialect/GPU/Targets/AMDGPUTarget.cpp b/mlir/lib/Dialect/GPU/Targets/AMDGPUTarget.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/GPU/Targets/AMDGPUTarget.cpp @@ -0,0 +1,406 @@ +//===- AMDGPUTarget.cpp - MLIR GPU Dialect AMDGPU target attribute --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files implements the AMDGPU target attribute. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" + +using namespace mlir; +using namespace mlir::gpu; + +#ifdef MLIR_GPU_AMDGPU_TARGET_ENABLED +#include "mlir/ExecutionEngine/ModuleToObject.h" +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Export.h" + +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/TargetParser/TargetParser.h" + +#ifndef __DEFAULT_ROCM_PATH__ +#define __DEFAULT_ROCM_PATH__ "" +#endif + +#define DEBUG_TYPE "serialize-to-object" + +namespace { +struct InitTarget { + InitTarget() { + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUAsmParser(); + LLVMInitializeAMDGPUAsmPrinter(); + } +}; + +class SerializeToHSA : public ModuleToObject { +public: + SerializeToHSA(Operation &module, AMDGPUTargetAttr target, + TargetOptions targetOptions = {}); + + // Init the target. + static void init(); + + // Get the paths of ROCm device libraries. Function adapted from: + // https://github.com/llvm/llvm-project/blob/main/clang/lib/Driver/ToolChains/AMDGPU.cpp + void getCommonBitcodeLibs(llvm::SmallVector &libs, + SmallVector &libPath, + StringRef isaVersion, bool wave64, bool daz, + bool finiteOnly, bool unsafeMath, bool fastMath, + bool correctSqrt, StringRef abiVer); + + // Removes unnecessary metadata from the loaded bitcode files. + void handleBitcodeFile(llvm::Module &module, + llvm::TargetMachine &targetMachine) override; + // Assembles the object. + std::optional> assembleIsa(StringRef isa); + + // Create the HSACO object. + std::optional> createHsaco(SmallVector &&ptx); + + std::optional>> + loadBitcodeFiles(llvm::LLVMContext &context, llvm::Module &module) override; + + std::optional> + moduleToObject(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) override; + +private: + AMDGPUTargetAttr target; + StringRef toolkitPath; + SmallVector fileList; +}; +} // namespace + +SerializeToHSA::SerializeToHSA(Operation &module, AMDGPUTargetAttr target, + TargetOptions targetOptions) + : ModuleToObject(module, target.getTriple(), target.getChip(), + target.getFeatures(), target.getO()), + target(target), toolkitPath(targetOptions.getToolkitPath()), + fileList(targetOptions.getBitcodeFiles()) { + if (toolkitPath.empty()) + toolkitPath = __DEFAULT_ROCM_PATH__; + + if (ArrayAttr files = target.getLink()) + for (Attribute attr : files.getValue()) + if (auto file = dyn_cast(attr)) + fileList.push_back(file.str()); +} + +void SerializeToHSA::init() { static InitTarget target = InitTarget(); } + +void SerializeToHSA::getCommonBitcodeLibs(llvm::SmallVector &libs, + SmallVector &libPath, + StringRef isaVersion, bool wave64, + bool daz, bool finiteOnly, + bool unsafeMath, bool fastMath, + bool correctSqrt, StringRef abiVer) { + auto addLib = [&](StringRef path) { + if (!llvm::sys::fs::is_regular_file(path)) { + getOperation().emitRemark() << "Bitcode library path: " << path + << " does not exist or is not a file.\n"; + return; + } + libs.push_back(path.str()); + }; + auto optLib = [](StringRef name, bool on) -> Twine { + return name + (on ? "_on" : "_off"); + }; + auto getLibPath = [&libPath](Twine lib) { + auto baseSize = libPath.size(); + llvm::sys::path::append(libPath, lib + ".bc"); + std::string path(StringRef(libPath.data(), libPath.size()).str()); + libPath.truncate(baseSize); + return path; + }; + + // Add ROCm device libraries. + addLib(getLibPath("ocml")); + addLib(getLibPath("ockl")); + addLib(getLibPath(optLib("oclc_daz_opt", daz))); + addLib(getLibPath(optLib("oclc_unsafe_math", unsafeMath || fastMath))); + addLib(getLibPath(optLib("oclc_finite_only", finiteOnly || fastMath))); + addLib(getLibPath(optLib("oclc_correctly_rounded_sqrt", correctSqrt))); + addLib(getLibPath(optLib("oclc_wavefrontsize64", wave64))); + addLib(getLibPath("oclc_isa_version_" + isaVersion)); + if (abiVer.size()) + addLib(getLibPath("oclc_abi_version_" + abiVer)); +} + +std::optional>> +SerializeToHSA::loadBitcodeFiles(llvm::LLVMContext &context, + llvm::Module &module) { + // Try loading device libraries from the ROCm toolkit installation. + StringRef pathRef = toolkitPath; + if (pathRef.size()) { + SmallVector path; + path.insert(path.begin(), pathRef.begin(), pathRef.end()); + llvm::sys::path::append(path, "amdgcn", "bitcode"); + pathRef = StringRef(path.data(), path.size()); + if (!llvm::sys::fs::is_directory(pathRef)) { + getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef + << " does not exist or is not a directory."; + return std::nullopt; + } + StringRef isaVersion = + llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip)); + isaVersion.consume_front("gfx"); + getCommonBitcodeLibs(fileList, path, isaVersion, target.getWave64(), + target.getDaz(), target.getFiniteOnly(), + target.getUnsafeMath(), target.getFastMath(), + target.getCorrectSqrt(), target.getAbi()); + } + + SmallVector> bcFiles; + if (failed(loadBitcodeFilesFromList(context, fileList, bcFiles, true))) + return std::nullopt; + return bcFiles; +} + +void SerializeToHSA::handleBitcodeFile(llvm::Module &module, + llvm::TargetMachine &targetMachine) { + // Some ROCM builds don't strip this like they should + if (auto *openclVersion = module.getNamedMetadata("opencl.ocl.version")) + module.eraseNamedMetadata(openclVersion); + // Stop spamming us with clang version numbers + if (auto *ident = module.getNamedMetadata("llvm.ident")) + module.eraseNamedMetadata(ident); +} + +//===----------------------------------------------------------------------===// +// AMDGPU pipeline methods. +//===----------------------------------------------------------------------===// +#include "mlir/Support/FileUtilities.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Program.h" + +std::optional> SerializeToHSA::assembleIsa(StringRef isa) { + auto loc = getOperation().getLoc(); + + StringRef targetTriple = this->triple; + + SmallVector result; + llvm::raw_svector_ostream os(result); + + llvm::Triple triple(llvm::Triple::normalize(targetTriple)); + std::string error; + const llvm::Target *target = + llvm::TargetRegistry::lookupTarget(triple.normalize(), error); + if (!target) { + emitError(loc, Twine("failed to lookup target: ") + error); + return std::nullopt; + } + + llvm::SourceMgr srcMgr; + srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc()); + + const llvm::MCTargetOptions mcOptions; + std::unique_ptr mri( + target->createMCRegInfo(targetTriple)); + std::unique_ptr mai( + target->createMCAsmInfo(*mri, targetTriple, mcOptions)); + mai->setRelaxELFRelocations(true); + std::unique_ptr sti( + target->createMCSubtargetInfo(targetTriple, chip, features)); + + llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, + &mcOptions); + std::unique_ptr mofi(target->createMCObjectFileInfo( + ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); + ctx.setObjectFileInfo(mofi.get()); + + SmallString<128> cwd; + if (!llvm::sys::fs::current_path(cwd)) + ctx.setCompilationDir(cwd); + + std::unique_ptr mcStreamer; + std::unique_ptr mcii(target->createMCInstrInfo()); + + llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx); + llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); + mcStreamer.reset(target->createMCObjectStreamer( + triple, ctx, std::unique_ptr(mab), + mab->createObjectWriter(os), std::unique_ptr(ce), + *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ false)); + mcStreamer->setUseAssemblerInfoForParsing(true); + + std::unique_ptr parser( + createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); + std::unique_ptr tap( + target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); + + if (!tap) { + emitError(loc, "assembler initialization error"); + return {}; + } + + parser->setTargetParser(*tap); + parser->Run(false); + + return result; +} + +std::optional> +SerializeToHSA::createHsaco(SmallVector &&ptx) { + SmallVector isaBinary = std::move(ptx); + auto loc = getOperation().getLoc(); + + // Save the ISA binary to a temp file. + int tempIsaBinaryFd = -1; + SmallString<128> tempIsaBinaryFilename; + if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd, + tempIsaBinaryFilename)) { + emitError(loc, "temporary file for ISA binary creation error"); + return {}; + } + llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); + llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); + tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size()); + tempIsaBinaryOs.close(); + + // Create a temp file for HSA code object. + int tempHsacoFD = -1; + SmallString<128> tempHsacoFilename; + if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD, + tempHsacoFilename)) { + emitError(loc, "temporary file for HSA code object creation error"); + return {}; + } + llvm::FileRemover cleanupHsaco(tempHsacoFilename); + + llvm::SmallString<32> lldPath(toolkitPath); + llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld"); + int lldResult = llvm::sys::ExecuteAndWait( + lldPath, + {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename}); + if (lldResult != 0) { + emitError(loc, "lld invocation error"); + return {}; + } + + // Load the HSA code object. + auto hsacoFile = openInputFile(tempHsacoFilename); + if (!hsacoFile) { + emitError(loc, "read HSA code object from temp file error"); + return {}; + } + + StringRef buffer = hsacoFile->getBuffer(); + + return SmallVector(buffer.begin(), buffer.end()); +} + +std::optional> +SerializeToHSA::moduleToObject(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) { + std::optional serializedISA = + translateToISA(llvmModule, targetMachine); + if (!serializedISA) { + getOperation().emitError() << "Failed translating the module to ISA."; + return std::nullopt; + } + + LLVM_DEBUG({ + llvm::dbgs() << "ISA for module: " + << dyn_cast(&getOperation()).getNameAttr() + << "\n"; + llvm::dbgs() << *serializedISA << "\n"; + llvm::dbgs().flush(); + }); + + std::optional> assembledIsa = + assembleIsa(serializedISA.value()); + + if (!assembledIsa) { + getOperation().emitError() << "Failed during ISA assembling."; + return std::nullopt; + } + + return createHsaco(std::move(assembledIsa.value())); +} + +std::optional> +AMDGPUTargetAttr::serializeToObject(Operation *module, + const TargetOptions &options) const { + assert(module && "The module must be non null."); + if (!module) + return std::nullopt; + if (!mlir::isa(module)) { + module->emitError("Module must be a GPU module."); + return std::nullopt; + } + SerializeToHSA::init(); + SerializeToHSA serializer(*module, *this, options); + return serializer.run(); +} + +#else +// Provide a null vector for testing purposes. +std::optional> +AMDGPUTargetAttr::serializeToObject(Operation *module, + const TargetOptions &options) const { + assert(module && "The module must be non null."); + if (!module) + return std::nullopt; + if (!mlir::isa(module)) { + module->emitError("Module must be a GPU module."); + return std::nullopt; + } + return SmallVector{}; +} +#endif // MLIR_GPU_AMDGPU_TARGET_ENABLED + +LogicalResult +AMDGPUTargetAttr::verify(function_ref emitError, + int optLevel, StringRef triple, StringRef chip, + StringRef features, StringRef abiVersion, + DictionaryAttr flags, ArrayAttr files) { + if (optLevel < 0 || optLevel > 3) { + emitError() << "The optimization level must be a number between 0 and 3."; + return failure(); + } + if (triple.empty()) { + emitError() << "The target triple cannot be empty."; + return failure(); + } + if (chip.empty()) { + emitError() << "The target chip cannot be empty."; + return failure(); + } + if (abiVersion != "400" && abiVersion != "500") { + emitError() << "Invalid ABI version, it must be either `400` or `500`."; + return failure(); + } + if (files && llvm::all_of(files, [](::mlir::Attribute attr) { + return attr && mlir::isa(attr); + })) { + emitError() << "All the elements in the `link` array must be strings."; + return failure(); + } + return success(); +} diff --git a/mlir/lib/Dialect/GPU/Targets/NVPTXTarget.cpp b/mlir/lib/Dialect/GPU/Targets/NVPTXTarget.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/GPU/Targets/NVPTXTarget.cpp @@ -0,0 +1,254 @@ +//===- NVPTXTarget.cpp - MLIR GPU Dialect NVPTX target attribute ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files implements the NVPTX target attribute. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/GPU/Transforms/Passes.h" + +using namespace mlir; +using namespace mlir::gpu; + +#ifdef MLIR_GPU_NVPTX_TARGET_ENABLED +#include "mlir/ExecutionEngine/ModuleToObject.h" +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Export.h" + +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/TargetSelect.h" + +#ifndef __DEFAULT_CUDATOOLKIT_PATH__ +#define __DEFAULT_CUDATOOLKIT_PATH__ "" +#endif + +#define DEBUG_TYPE "serialize-to-object" + +#include + +static void emitCudaError(const llvm::Twine &expr, const char *buffer, + CUresult result, Location loc) { + const char *error; + cuGetErrorString(result, &error); + emitError(loc, expr.concat(" failed with error code ") + .concat(llvm::Twine{error}) + .concat("[") + .concat(buffer) + .concat("]")); +} + +#define RETURN_ON_CUDA_ERROR(expr) \ + do { \ + if (auto status = (expr)) { \ + emitCudaError(#expr, jitErrorBuffer, status, loc); \ + return {}; \ + } \ + } while (false) + +namespace { +struct InitTarget { + InitTarget() { + LLVMInitializeNVPTXTarget(); + LLVMInitializeNVPTXTargetInfo(); + LLVMInitializeNVPTXTargetMC(); + LLVMInitializeNVPTXAsmPrinter(); + } +}; + +class SerializeToCubin : public ModuleToObject { +public: + SerializeToCubin(Operation &module, NVPTXTargetAttr target, + TargetOptions targetOptions = {}); + + // Init the target. + static void init(); + + std::optional>> + loadBitcodeFiles(llvm::LLVMContext &context, llvm::Module &module) override; + + std::optional> + moduleToObject(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) override; + +private: + StringRef toolkitPath; + SmallVector fileList; +}; +} // namespace + +SerializeToCubin::SerializeToCubin(Operation &module, NVPTXTargetAttr target, + TargetOptions targetOptions) + : ModuleToObject(module, target.getTriple(), target.getChip(), + target.getFeatures(), target.getO()), + toolkitPath(targetOptions.getToolkitPath()), + fileList(targetOptions.getBitcodeFiles()) { + if (toolkitPath.empty()) + toolkitPath = __DEFAULT_CUDATOOLKIT_PATH__; + + if (ArrayAttr files = target.getLink()) + for (Attribute attr : files.getValue()) + if (auto file = dyn_cast(attr)) + fileList.push_back(file.str()); +} + +void SerializeToCubin::init() { static InitTarget target = InitTarget(); } + +std::optional>> +SerializeToCubin::loadBitcodeFiles(llvm::LLVMContext &context, + llvm::Module &module) { + // Try loading `libdevice` from a CUDA toolkit installation. + StringRef pathRef = toolkitPath; + if (pathRef.size()) { + SmallVector path; + path.insert(path.begin(), pathRef.begin(), pathRef.end()); + pathRef = StringRef(path.data(), path.size()); + if (!llvm::sys::fs::is_directory(pathRef)) { + getOperation().emitError() << "CUDA path: " << pathRef + << " does not exist or is not a directory.\n"; + return std::nullopt; + } + // TODO remove this hard coded path. + llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc"); + pathRef = StringRef(path.data(), path.size()); + if (!llvm::sys::fs::is_regular_file(pathRef)) { + getOperation().emitError() << "LibDevice path: " << pathRef + << " does not exist or is not a file.\n"; + return std::nullopt; + } + fileList.push_back(pathRef.str()); + } + + SmallVector> bcFiles; + if (failed(loadBitcodeFilesFromList(context, fileList, bcFiles, true))) + return std::nullopt; + return bcFiles; +} + +std::optional> +SerializeToCubin::moduleToObject(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) { + std::optional serializedISA = + translateToISA(llvmModule, targetMachine); + if (!serializedISA) { + getOperation().emitError() << "Failed translating the module to ISA."; + return std::nullopt; + } + + LLVM_DEBUG({ + llvm::dbgs() << "ISA for module: " + << dyn_cast(&getOperation()).getNameAttr() + << "\n"; + llvm::dbgs() << *serializedISA << "\n"; + llvm::dbgs().flush(); + }); + + auto loc = getOperation().getLoc(); + char jitErrorBuffer[4096] = {0}; + + RETURN_ON_CUDA_ERROR(cuInit(0)); + + // Linking requires a device context. + CUdevice device; + RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0)); + CUcontext context; + RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device)); + CUlinkState linkState; + + CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER, + CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}; + void *jitOptionsVals[] = {jitErrorBuffer, + reinterpret_cast(sizeof(jitErrorBuffer))}; + + RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */ + jitOptions, /* jit options */ + jitOptionsVals, /* jit option values */ + &linkState)); + + auto kernelName = dyn_cast(getOperation()).getName().str(); + RETURN_ON_CUDA_ERROR(cuLinkAddData( + linkState, CUjitInputType::CU_JIT_INPUT_PTX, + const_cast(static_cast(serializedISA->c_str())), + serializedISA->length(), kernelName.c_str(), + 0, /* number of jit options */ + nullptr, /* jit options */ + nullptr /* jit option values */ + )); + + void *cubinData; + size_t cubinSize; + RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize)); + + char *cubinAsChar = static_cast(cubinData); + auto result = SmallVector(cubinAsChar, cubinAsChar + cubinSize); + + // This will also destroy the cubin data. + RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState)); + RETURN_ON_CUDA_ERROR(cuCtxDestroy(context)); + return result; +} + +std::optional> +NVPTXTargetAttr::serializeToObject(Operation *module, + const TargetOptions &options) const { + assert(module && "The module must be non null."); + if (!module) + return std::nullopt; + if (!mlir::isa(module)) { + module->emitError("Module must be a GPU module."); + return std::nullopt; + } + SerializeToCubin::init(); + SerializeToCubin serializer(*module, *this, options); + return serializer.run(); +} + +#else +// Provide a null vector for testing purposes. +std::optional> +NVPTXTargetAttr::serializeToObject(Operation *module, + const TargetOptions &options) const { + assert(module && "The module must be non null."); + if (!module) + return std::nullopt; + if (!mlir::isa(module)) { + module->emitError("Module must be a GPU module."); + return std::nullopt; + } + return SmallVector{}; +} +#endif // MLIR_GPU_NVPTX_TARGET_ENABLED + +LogicalResult +NVPTXTargetAttr::verify(function_ref emitError, + int optLevel, StringRef triple, StringRef chip, + StringRef features, DictionaryAttr flags, + ArrayAttr files) { + if (optLevel < 0 || optLevel > 3) { + emitError() << "The optimization level must be a number between 0 and 3."; + return failure(); + } + if (triple.empty()) { + emitError() << "The target triple cannot be empty."; + return failure(); + } + if (chip.empty()) { + emitError() << "The target chip cannot be empty."; + return failure(); + } + if (files && llvm::all_of(files, [](::mlir::Attribute attr) { + return attr && mlir::isa(attr); + })) { + emitError() << "All the elements in the `link` array must be strings."; + return failure(); + } + return success(); +} diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -19,6 +19,7 @@ # libMLIR.so. add_mlir_library(MLIRExecutionEngineUtils OptUtils.cpp + ModuleToObject.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/ExecutionEngine diff --git a/mlir/lib/ExecutionEngine/ModuleToObject.cpp b/mlir/lib/ExecutionEngine/ModuleToObject.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/ExecutionEngine/ModuleToObject.cpp @@ -0,0 +1,221 @@ +//===- ModuleToObject.cpp - Module to object base class ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the base class for transforming Operations into binary +// objects. +// +//===----------------------------------------------------------------------===// + +#include "mlir/ExecutionEngine/ModuleToObject.h" + +#include "mlir/ExecutionEngine/OptUtils.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Export.h" +#include "mlir/Target/LLVMIR/ModuleTranslation.h" + +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Linker/Linker.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/TargetParser/TargetParser.h" +#include "llvm/Transforms/IPO/Internalize.h" + +using namespace mlir; + +ModuleToObject::ModuleToObject(Operation &module, StringRef triple, + StringRef chip, StringRef features, int optLevel) + : module(module), triple(triple), chip(chip), features(features), + optLevel(optLevel) {} + +Operation &ModuleToObject::getOperation() { return module; } + +std::unique_ptr ModuleToObject::createTargetMachine() { + std::string error; + // Load the target. + const llvm::Target *target = + llvm::TargetRegistry::lookupTarget(triple, error); + if (!target) { + getOperation().emitError() << "Failed to lookup target: " << error; + return {}; + } + + // Create the target machine using the target. + llvm::TargetMachine *machine = + target->createTargetMachine(triple, chip, features, {}, {}); + if (!machine) { + getOperation().emitError() << "Failed to create the target machine."; + return {}; + } + return std::unique_ptr{machine}; +} + +std::unique_ptr +ModuleToObject::loadBitcodeFile(llvm::LLVMContext &context, StringRef path) { + llvm::SMDiagnostic error; + std::unique_ptr library = + llvm::getLazyIRFileModule(path, error, context); + if (!library) { + getOperation().emitError() << "Failed loading file from " << path + << ", error: " << error.getMessage(); + return nullptr; + } + return library; +} + +LogicalResult ModuleToObject::loadBitcodeFilesFromList( + llvm::LLVMContext &context, ArrayRef fileList, + SmallVector> &llvmModules, + bool failureOnError) { + for (const std::string &str : fileList) { + // Test if the path exists, if it doesn't abort. + StringRef pathRef = StringRef(str.data(), str.size()); + if (!llvm::sys::fs::is_regular_file(pathRef)) { + getOperation().emitError() + << "File path: " << pathRef << " does not exist or is not a file.\n"; + return failure(); + } + // Load the file or abort on error. + if (auto bcFile = loadBitcodeFile(context, pathRef)) + llvmModules.push_back(std::move(bcFile)); + else if (failureOnError) + return failure(); + } + return success(); +} + +std::unique_ptr +ModuleToObject::translateToLLVMIR(llvm::LLVMContext &llvmContext) { + return translateModuleToLLVMIR(&getOperation(), llvmContext); +} + +LogicalResult +ModuleToObject::linkFiles(llvm::Module &module, + SmallVector> &&libs) { + if (libs.empty()) + return success(); + llvm::Linker linker(module); + for (std::unique_ptr &libModule : libs) { + // This bitcode linking imports the library functions into the module, + // allowing LLVM optimization passes (which must run after linking) to + // optimize across the libraries and the module's code. We also only import + // symbols if they are referenced by the module or a previous library since + // there will be no other source of references to those symbols in this + // compilation and since we don't want to bloat the resulting code object. + bool err = linker.linkInModule( + std::move(libModule), llvm::Linker::Flags::LinkOnlyNeeded, + [](llvm::Module &m, const StringSet<> &gvs) { + llvm::internalizeModule(m, [&gvs](const llvm::GlobalValue &gv) { + return !gv.hasName() || (gvs.count(gv.getName()) == 0); + }); + }); + // True is linker failure + if (err) { + getOperation().emitError("Unrecoverable failure during bitcode linking."); + // We have no guaranties about the state of `ret`, so bail + return failure(); + } + } + return success(); +} + +LogicalResult ModuleToObject::optimizeModule(llvm::Module &module, + llvm::TargetMachine &targetMachine, + int optLevel) { + if (optLevel < 0 || optLevel > 3) + return getOperation().emitError() + << "Invalid optimization level: " << optLevel << "."; + + targetMachine.setOptLevel(static_cast(optLevel)); + + auto transformer = + makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine); + auto error = transformer(&module); + if (error) { + InFlightDiagnostic mlirError = getOperation().emitError(); + llvm::handleAllErrors( + std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) { + mlirError << "Could not optimize LLVM IR: " << ei.message() << "\n"; + }); + return mlirError; + } + return success(); +} + +std::optional +ModuleToObject::translateToISA(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) { + std::string targetISA; + llvm::raw_string_ostream stream(targetISA); + + { // Drop pstream after this to prevent the ISA from being stuck buffering + llvm::buffer_ostream pstream(stream); + llvm::legacy::PassManager codegenPasses; + + if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr, + llvm::CGFT_AssemblyFile)) + return std::nullopt; + + codegenPasses.run(llvmModule); + } + return stream.str(); +} + +std::optional> +ModuleToObject::moduleToObject(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) { + SmallVector binaryData; + // Write the LLVM module bitcode to a buffer. + llvm::raw_svector_ostream outputStream(binaryData); + llvm::WriteBitcodeToFile(llvmModule, outputStream); + return binaryData; +} + +std::optional> ModuleToObject::run() { + // Translate the module to LLVM IR. + llvm::LLVMContext llvmContext; + std::unique_ptr llvmModule = translateToLLVMIR(llvmContext); + if (!llvmModule) { + getOperation().emitError() << "Failed creating the llvm::Module."; + return std::nullopt; + } + + // Create the target machine. + std::unique_ptr targetMachine = createTargetMachine(); + if (!targetMachine) + return std::nullopt; + + // Set the data layout and target triple of the module. + llvmModule->setDataLayout(targetMachine->createDataLayout()); + llvmModule->setTargetTriple(targetMachine->getTargetTriple().getTriple()); + + // Link bitcode files. + handleModulePreLink(*llvmModule, *targetMachine); + { + auto libs = loadBitcodeFiles(llvmContext, *llvmModule); + if (!libs) + return std::nullopt; + if (libs->size()) + if (failed(linkFiles(*llvmModule, std::move(*libs)))) + return std::nullopt; + handleModulePostLink(*llvmModule, *targetMachine); + } + + // Optimize the module. + if (failed(optimizeModule(*llvmModule, *targetMachine, optLevel))) + return std::nullopt; + + // Return the serialized object. + return moduleToObject(*llvmModule, *targetMachine); +} diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -694,7 +694,8 @@ } /// Create named global variables that correspond to llvm.mlir.global -/// definitions. Convert llvm.global_ctors and global_dtors ops. +/// definitions. Convert llvm.global_ctors and global_dtors ops. Finally convert +/// operations with the `GlobalSymbol` trait. LogicalResult ModuleTranslation::convertGlobals() { for (auto op : getModuleBody(mlirModule).getOps()) { llvm::Type *type = convertType(op.getType()); @@ -797,6 +798,18 @@ if (failed(convertDialectAttributes(op))) return failure(); + // Convert operations having the `GlobalSymbol` trait. + { + llvm::IRBuilder<> llvmBuilder(llvmModule->getContext()); + for (Operation &op : getModuleBody(mlirModule).getOperations()) { + if (!isa(&op) && + op.hasTrait() && + failed(convertOperation(op, llvmBuilder))) { + return failure(); + } + } + } + return success(); } @@ -1427,6 +1440,7 @@ if (!isa(&o) && !o.hasTrait() && + !o.hasTrait() && failed(translator.convertOperation(o, llvmBuilder))) { return nullptr; } diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -77,7 +77,7 @@ module attributes {gpu.container_module} { func.func @launch_func_undefined_module(%sz : index) { - // expected-error@+1 {{kernel module 'kernels' is undefined}} + // expected-error@+1 {{kernel container 'kernels' is undefined}} gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) return } @@ -610,3 +610,19 @@ } } } + +// ----- + +module { + // expected-error @+1 {{'gpu.module' op attribute 'targets' failed to satisfy constraint: Array of GPU target attributes with at least 1 elements}} + gpu.module @gpu_funcs [] { + } +} + +// ----- + +module { + // expected-error @+1 {{'gpu.module' op attribute 'targets' failed to satisfy constraint: Array of GPU target attributes with at least 1 elements}} + gpu.module @gpu_funcs [1] { + } +}