Diff 329590

mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h

	//===- GPUCommonPass.h - MLIR GPU runtime support -------------------------===//			//===- GPUCommonPass.h - MLIR GPU runtime support -------------------------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	#ifndef MLIR_CONVERSION_GPUCOMMON_GPUCOMMONPASS_H_			#ifndef MLIR_CONVERSION_GPUCOMMON_GPUCOMMONPASS_H_
	#define MLIR_CONVERSION_GPUCOMMON_GPUCOMMONPASS_H_			#define MLIR_CONVERSION_GPUCOMMON_GPUCOMMONPASS_H_

	#include "mlir/Support/LLVM.h"			#include "mlir/Support/LLVM.h"
	#include "llvm/IR/Module.h"			#include "llvm/ADT/StringRef.h"
	#include <vector>			#include <vector>

				namespace llvm {
				class LLVMContext;
				class Module;
				} // namespace llvm

	namespace mlir {			namespace mlir {

	class LLVMTypeConverter;			class LLVMTypeConverter;
	class Location;			class Location;
	struct LogicalResult;			struct LogicalResult;
	class ModuleOp;			class ModuleOp;
	class Operation;			class Operation;
	class OwningRewritePatternList;			class OwningRewritePatternList;

	template <typename T>			template <typename T>
	class OperationPass;			class OperationPass;

	namespace gpu {			namespace gpu {
	class GPUModuleOp;			class GPUModuleOp;

	/// Returns the default annotation name for GPU binary blobs.
	std::string getDefaultGpuBinaryAnnotation();
	} // namespace gpu			} // namespace gpu

	namespace LLVM {			namespace LLVM {
	class LLVMDialect;			class LLVMDialect;
	} // namespace LLVM			} // namespace LLVM

	using OwnedBlob = std::unique_ptr<std::vector<char>>;			using OwnedBlob = std::unique_ptr<std::vector<char>>;
	using BlobGenerator =			using BlobGenerator =
	▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines

mlir/include/mlir/Dialect/GPU/Passes.h

	//===- Passes.h - Pass Entrypoints ------------------------------- C++ --===//			//===- Passes.h - Pass Entrypoints ------------------------------- C++ --===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
	// This header file defines prototypes that expose pass constructors.			// This header file defines prototypes that expose pass constructors.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef MLIR_DIALECT_GPU_PASSES_H_			#ifndef MLIR_DIALECT_GPU_PASSES_H_
	#define MLIR_DIALECT_GPU_PASSES_H_			#define MLIR_DIALECT_GPU_PASSES_H_

				#include "mlir/Dialect/GPU/GPUDialect.h"
	#include "mlir/Pass/Pass.h"			#include "mlir/Pass/Pass.h"

				namespace llvm {
				class TargetMachine;
				class LLVMContext;
				class Module;
				} // namespace llvm

	namespace mlir {			namespace mlir {
	/// Replaces `gpu.launch` with `gpu.launch_func` by moving the region into			/// Replaces `gpu.launch` with `gpu.launch_func` by moving the region into
	/// a separate kernel function.			/// a separate kernel function.
	std::unique_ptr<OperationPass<ModuleOp>> createGpuKernelOutliningPass();			std::unique_ptr<OperationPass<ModuleOp>> createGpuKernelOutliningPass();

	/// Rewrites a function region so that GPU ops execute asynchronously.			/// Rewrites a function region so that GPU ops execute asynchronously.
	std::unique_ptr<OperationPass<FuncOp>> createGpuAsyncRegionPass();			std::unique_ptr<OperationPass<FuncOp>> createGpuAsyncRegionPass();

	/// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.			/// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
	void populateGpuAllReducePatterns(MLIRContext *context,			void populateGpuAllReducePatterns(MLIRContext *context,
	OwningRewritePatternList &patterns);			OwningRewritePatternList &patterns);

	/// Collect all patterns to rewrite ops within the GPU dialect.			/// Collect all patterns to rewrite ops within the GPU dialect.
	inline void populateGpuRewritePatterns(MLIRContext *context,			inline void populateGpuRewritePatterns(MLIRContext *context,
	OwningRewritePatternList &patterns) {			OwningRewritePatternList &patterns) {
	populateGpuAllReducePatterns(context, patterns);			populateGpuAllReducePatterns(context, patterns);
	}			}

				namespace gpu {
				/// Returns the default annotation name for GPU binary blobs.
				std::string getDefaultGpuBinaryAnnotation();

				/// Base pass class to serialize kernel functions through LLVM into
				/// user-specified IR and add the resulting blob as module attribute.
				class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
				herhutUnsubmitted Not Done Reply Inline Actions This is not public API but rather an implementation detail. Could it move to an implementation header in `lib`? herhut: This is not public API but rather an implementation detail. Could it move to an implementation…
				csiggAuthorUnsubmitted Done Reply Inline Actions At the moment it is public API because it's used outside. Once we cleaned up the use in Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp (which I will do in a separate CL), we can move it. csigg: At the moment it is public API because it's used outside. Once we cleaned up the use in…
				public:
				SerializeToBlobPass(TypeID passID);
				SerializeToBlobPass(const SerializeToBlobPass &other);

				void runOnOperation() final;

				private:
				// Creates the LLVM target machine to generate the ISA.
				std::unique_ptr<llvm::TargetMachine> createTargetMachine();

				// Translates the 'getOperation()' result to an LLVM module.
				virtual std::unique_ptr<llvm::Module>
				translateToLLVMIR(llvm::LLVMContext &llvmContext) = 0;

				// Serializes the target ISA to binary form.
				virtual std::unique_ptr<std::vector<char>>
				serializeISA(const std::string &isa) = 0;

				protected:
				Option<std::string> triple{*this, "triple",
				::llvm::cl::desc("Target triple")};
				Option<std::string> chip{*this, "chip",
				::llvm::cl::desc("Target architecture")};
				Option<std::string> features{*this, "features",
				::llvm::cl::desc("Target features")};
				Option<std::string> gpuBinaryAnnotation{
				*this, "gpu-binary-annotation",
				llvm::cl::desc("Annotation attribute string for GPU binary"),
				llvm::cl::init(getDefaultGpuBinaryAnnotation())};
				};
				} // namespace gpu

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// Registration			// Registration
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	/// Generate the code for registering passes.			/// Generate the code for registering passes.
	#define GEN_PASS_REGISTRATION			#define GEN_PASS_REGISTRATION
	#include "mlir/Dialect/GPU/Passes.h.inc"			#include "mlir/Dialect/GPU/Passes.h.inc"

	} // namespace mlir			} // namespace mlir

	#endif // MLIR_DIALECT_GPU_PASSES_H_			#endif // MLIR_DIALECT_GPU_PASSES_H_

mlir/lib/Conversion/GPUCommon/CMakeLists.txt

Show All 18 Lines	add_mlir_conversion_library(MLIRGPUToGPURuntimeTransforms
ConvertKernelFuncToBlob.cpp		ConvertKernelFuncToBlob.cpp
GPUOpsLowering.cpp		GPUOpsLowering.cpp

DEPENDS		DEPENDS
MLIRConversionPassIncGen		MLIRConversionPassIncGen
intrinsics_gen		intrinsics_gen

LINK_COMPONENTS		LINK_COMPONENTS
Core
MC
${AMDGPU_LIBS}		${AMDGPU_LIBS}
${NVPTX_LIBS}		${NVPTX_LIBS}

LINK_LIBS PUBLIC		LINK_LIBS PUBLIC
MLIRAsyncToLLVM		MLIRAsyncToLLVM
MLIRGPU		MLIRGPU
MLIRIR		MLIRIR
MLIRLLVMIR		MLIRLLVMIR
MLIRPass		MLIRPass
MLIRSupport		MLIRSupport
MLIRStandardToLLVM		MLIRStandardToLLVM
)		)

mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp

	Show All 9 Lines
	// corresponding binary blob that can be executed on a GPU. Currently			// corresponding binary blob that can be executed on a GPU. Currently
	// only translates the function itself but no dependencies.			// only translates the function itself but no dependencies.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"			#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"

	#include "mlir/Dialect/GPU/GPUDialect.h"			#include "mlir/Dialect/GPU/GPUDialect.h"
				#include "mlir/Dialect/GPU/Passes.h"
	#include "mlir/Dialect/LLVMIR/LLVMDialect.h"			#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
	#include "mlir/IR/Attributes.h"			#include "mlir/IR/Attributes.h"
	#include "mlir/IR/Builders.h"			#include "mlir/IR/Builders.h"
	#include "mlir/IR/BuiltinOps.h"			#include "mlir/IR/BuiltinOps.h"
	#include "mlir/Pass/Pass.h"			#include "mlir/Pass/Pass.h"
	#include "mlir/Pass/PassRegistry.h"			#include "mlir/Pass/PassRegistry.h"
	#include "mlir/Support/LogicalResult.h"			#include "mlir/Support/LogicalResult.h"

	#include "llvm/ADT/Optional.h"			#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/Twine.h"			#include "llvm/ADT/Twine.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/LegacyPassManager.h"
	#include "llvm/IR/Module.h"
	#include "llvm/Support/Error.h"			#include "llvm/Support/Error.h"
	#include "llvm/Support/Mutex.h"			#include "llvm/Support/Mutex.h"
	#include "llvm/Support/TargetRegistry.h"			#include "llvm/Support/TargetRegistry.h"
	#include "llvm/Support/TargetSelect.h"			#include "llvm/Support/TargetSelect.h"
	#include "llvm/Target/TargetMachine.h"

	using namespace mlir;			using namespace mlir;

	namespace {			namespace {

	/// A pass converting tagged kernel modules to a blob with target instructions.			/// A pass converting tagged kernel modules to a blob with target instructions.
	///			///
	/// If tagged as a kernel module, each contained function is translated to			/// If tagged as a kernel module, each contained function is translated to
	/// user-specified IR. A user provided BlobGenerator then compiles the IR to			/// user-specified IR. A user provided BlobGenerator then compiles the IR to
	/// GPU binary code, which is then attached as an attribute to the function.			/// GPU binary code, which is then attached as an attribute to the function.
	/// The function body is erased.			/// The function body is erased.
	class GpuKernelToBlobPass			class GpuKernelToBlobPass
	: public PassWrapper<GpuKernelToBlobPass, OperationPass<gpu::GPUModuleOp>> {			: public PassWrapper<GpuKernelToBlobPass, gpu::SerializeToBlobPass> {
	public:			public:
	GpuKernelToBlobPass(LoweringCallback loweringCallback,			GpuKernelToBlobPass(LoweringCallback loweringCallback,
	BlobGenerator blobGenerator, StringRef triple,			BlobGenerator blobGenerator, StringRef triple,
	StringRef targetChip, StringRef features,			StringRef targetChip, StringRef features,
	StringRef gpuBinaryAnnotation)			StringRef gpuBinaryAnnotation)
	: loweringCallback(loweringCallback), blobGenerator(blobGenerator),			: loweringCallback(loweringCallback), blobGenerator(blobGenerator) {
	triple(triple), targetChip(targetChip), features(features) {			if (!triple.empty())
				this->triple = triple.str();
				if (!targetChip.empty())
				this->chip = targetChip.str();
				if (!features.empty())
				this->features = features.str();
	if (!gpuBinaryAnnotation.empty())			if (!gpuBinaryAnnotation.empty())
	this->gpuBinaryAnnotation = gpuBinaryAnnotation.str();			this->gpuBinaryAnnotation = gpuBinaryAnnotation.str();
	}			}

	GpuKernelToBlobPass(const GpuKernelToBlobPass &other)
	: loweringCallback(other.loweringCallback),
	blobGenerator(other.blobGenerator), triple(other.triple),
	targetChip(other.targetChip), features(other.features) {}

	void runOnOperation() override {
	gpu::GPUModuleOp module = getOperation();

	// Lower the module to an LLVM IR module using a separate context to enable
	// multi-threaded processing.
	llvm::LLVMContext llvmContext;
	std::unique_ptr<llvm::Module> llvmModule =
	loweringCallback(module, llvmContext, "LLVMDialectModule");
	if (!llvmModule)
	return signalPassFailure();

	// Translate the llvm module to a target blob and attach the result as
	// attribute to the module.
	if (auto blobAttr = translateGPUModuleToBinaryAnnotation(
	*llvmModule, module.getLoc(), module.getName()))
	module->setAttr(gpuBinaryAnnotation, blobAttr);
	else
	signalPassFailure();
	}

	private:			private:
	std::string translateModuleToISA(llvm::Module &module,			// Translates the 'getOperation()' result to an LLVM module.
	llvm::TargetMachine &targetMachine);			std::unique_ptr<llvm::Module>
				translateToLLVMIR(llvm::LLVMContext &llvmContext) override {
				return loweringCallback(getOperation(), llvmContext, "LLVMDialectModule");
				}

	/// Converts llvmModule to a blob with target instructions using the			// Serializes the target ISA to binary form.
	/// user-provided generator. Location is used for error reporting and name is			std::unique_ptr<std::vector<char>>
	/// forwarded to the blob generator to use in its logging mechanisms.			serializeISA(const std::string &isa) override {
				rriddleUnsubmitted Not Done Reply Inline Actions This is copying the string, why not StringRef? rriddle: This is copying the string, why not StringRef?
				csiggAuthorUnsubmitted Done Reply Inline Actions Good point. I changed it to a string const-reference because it is used as null-terminated char pointer. csigg: Good point. I changed it to a string const-reference because it is used as null-terminated char…
	OwnedBlob convertModuleToBlob(llvm::Module &llvmModule, Location loc,			return blobGenerator(isa, getOperation().getLoc(),
	StringRef name);			getOperation().getName());
				}
	/// Translates llvmModule to a blob with target instructions and returns the
	/// result as attribute.
	StringAttr translateGPUModuleToBinaryAnnotation(llvm::Module &llvmModule,
	Location loc, StringRef name);

	LoweringCallback loweringCallback;			LoweringCallback loweringCallback;
	BlobGenerator blobGenerator;			BlobGenerator blobGenerator;

	llvm::Triple triple;
	std::string targetChip;
	std::string features;

	Option<std::string> gpuBinaryAnnotation{
	*this, "gpu-binary-annotation",
	llvm::cl::desc("Annotation attribute string for GPU binary"),
	llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())};
	};			};

	} // anonymous namespace			} // anonymous namespace

	std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }

	std::string
	GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module,
	llvm::TargetMachine &targetMachine) {
	std::string targetISA;
	{
	llvm::raw_string_ostream stream(targetISA);
	llvm::buffer_ostream pstream(stream);
	llvm::legacy::PassManager codegenPasses;
	targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
	llvm::CGFT_AssemblyFile);
	codegenPasses.run(module);
	}

	return targetISA;
	}

	OwnedBlob GpuKernelToBlobPass::convertModuleToBlob(llvm::Module &llvmModule,
	Location loc,
	StringRef name) {
	std::unique_ptr<llvm::TargetMachine> targetMachine;
	{
	std::string error;
	const llvm::Target *target =
	llvm::TargetRegistry::lookupTarget("", triple, error);
	if (target == nullptr) {
	emitError(loc, "cannot initialize target triple");
	return {};
	}
	targetMachine.reset(target->createTargetMachine(triple.str(), targetChip,
	features, {}, {}));
	if (targetMachine == nullptr) {
	emitError(loc, "cannot initialize target machine");
	return {};
	}
	}

	llvmModule.setDataLayout(targetMachine->createDataLayout());

	auto targetISA = translateModuleToISA(llvmModule, *targetMachine);

	return blobGenerator(targetISA, loc, name);
	}

	StringAttr GpuKernelToBlobPass::translateGPUModuleToBinaryAnnotation(
	llvm::Module &llvmModule, Location loc, StringRef name) {
	auto blob = convertModuleToBlob(llvmModule, loc, name);
	if (!blob)
	return {};
	return StringAttr::get(loc->getContext(), {blob->data(), blob->size()});
	}

	std::unique_ptr<OperationPass<gpu::GPUModuleOp>>			std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
	mlir::createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback,			mlir::createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback,
	BlobGenerator blobGenerator,			BlobGenerator blobGenerator,
	StringRef triple, StringRef targetChip,			StringRef triple, StringRef targetChip,
	StringRef features,			StringRef features,
	StringRef gpuBinaryAnnotation) {			StringRef gpuBinaryAnnotation) {
	return std::make_unique<GpuKernelToBlobPass>(loweringCallback, blobGenerator,			return std::make_unique<GpuKernelToBlobPass>(loweringCallback, blobGenerator,
	triple, targetChip, features,			triple, targetChip, features,
	gpuBinaryAnnotation);			gpuBinaryAnnotation);
	}			}

mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp

	Show All 14 Lines

	#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"			#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"

	#include "../PassDetail.h"			#include "../PassDetail.h"
	#include "mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h"			#include "mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h"
	#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"			#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
	#include "mlir/Dialect/Async/IR/Async.h"			#include "mlir/Dialect/Async/IR/Async.h"
	#include "mlir/Dialect/GPU/GPUDialect.h"			#include "mlir/Dialect/GPU/GPUDialect.h"
				#include "mlir/Dialect/GPU/Passes.h"
	#include "mlir/Dialect/LLVMIR/LLVMDialect.h"			#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
	#include "mlir/IR/Attributes.h"			#include "mlir/IR/Attributes.h"
	#include "mlir/IR/Builders.h"			#include "mlir/IR/Builders.h"
	#include "mlir/IR/BuiltinOps.h"			#include "mlir/IR/BuiltinOps.h"
	#include "mlir/IR/BuiltinTypes.h"			#include "mlir/IR/BuiltinTypes.h"

	#include "llvm/ADT/STLExtras.h"			#include "llvm/ADT/STLExtras.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/Type.h"
	#include "llvm/Support/Error.h"			#include "llvm/Support/Error.h"
	#include "llvm/Support/FormatVariadic.h"			#include "llvm/Support/FormatVariadic.h"

	using namespace mlir;			using namespace mlir;

	static constexpr const char *kGpuBinaryStorageSuffix = "_gpubin_cst";			static constexpr const char *kGpuBinaryStorageSuffix = "_gpubin_cst";

	namespace {			namespace {
	▲ Show 20 Lines • Show All 768 Lines • Show Last 20 Lines

mlir/lib/Dialect/GPU/CMakeLists.txt

	add_mlir_dialect_library(MLIRGPU			add_mlir_dialect_library(MLIRGPU
	IR/GPUDialect.cpp			IR/GPUDialect.cpp
	Transforms/AllReduceLowering.cpp			Transforms/AllReduceLowering.cpp
	Transforms/AsyncRegionRewriter.cpp			Transforms/AsyncRegionRewriter.cpp
	Transforms/KernelOutlining.cpp			Transforms/KernelOutlining.cpp
	Transforms/MemoryPromotion.cpp			Transforms/MemoryPromotion.cpp
	Transforms/ParallelLoopMapper.cpp			Transforms/ParallelLoopMapper.cpp
				Transforms/SerializeToBlob.cpp

	ADDITIONAL_HEADER_DIRS			ADDITIONAL_HEADER_DIRS
	${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU			${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU

				LINK_COMPONENTS
				Core
				MC

	DEPENDS			DEPENDS
	MLIRGPUOpsIncGen			MLIRGPUOpsIncGen
	MLIRGPUOpInterfacesIncGen			MLIRGPUOpInterfacesIncGen
	MLIRGPUPassIncGen			MLIRGPUPassIncGen
	MLIRParallelLoopMapperAttrGen			MLIRParallelLoopMapperAttrGen
	MLIRParallelLoopMapperEnumsGen			MLIRParallelLoopMapperEnumsGen

	LINK_LIBS PUBLIC			LINK_LIBS PUBLIC
	Show All 11 Lines

mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp

This file was added.

				//===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				//
				// This file implements a base class for a pass to serialize a gpu module
				// into a binary blob that can be executed on a GPU. The binary blob is added
				// as a string attribute to the gpu module.
				//
				herhutUnsubmitted Done Reply Inline Actions Not sure this comment is correct. It translates the entire module, no? herhut: Not sure this comment is correct. It translates the entire module, no?
				//===----------------------------------------------------------------------===//

				#include "mlir/Dialect/GPU/Passes.h"
				#include "mlir/Pass/Pass.h"
				#include "llvm/IR/LegacyPassManager.h"
				#include "llvm/Support/TargetRegistry.h"
				#include "llvm/Support/TargetSelect.h"
				#include "llvm/Target/TargetMachine.h"

				using namespace mlir;

				std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }

				gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID)
				: OperationPass<gpu::GPUModuleOp>(passID) {}

				gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other)
				: OperationPass<gpu::GPUModuleOp>(other) {
				// Pass::Option has no copy constructor, copy them manually.
				triple = other.triple;
				chip = other.chip;
				herhutUnsubmitted Not Done Reply Inline Actions I have never seen a pass copy options before. How is this normally handled? herhut: I have never seen a pass copy options before. How is this normally handled?
				csiggAuthorUnsubmitted Done Reply Inline Actions Pass::Option is currently missing a copy c'tor. I didn't want to delay this change (or make unrelated changes) and will clean it up separately. csigg: Pass::Option is currently missing a copy c'tor. I didn't want to delay this change (or make…
				rriddleUnsubmitted Not Done Reply Inline Actions Pass options are copied implicitly as part of `clone`, i.e. you shouldn't need to do anything. rriddle: Pass options are copied implicitly as part of `clone`, i.e. you shouldn't need to do anything.
				rriddleUnsubmitted Not Done Reply Inline Actions (Did you miss this comment?) rriddle: (Did you miss this comment?)
				csiggAuthorUnsubmitted Done Reply Inline Actions I did miss your comment, sorry. I will send a follow-up change. csigg: I did miss your comment, sorry. I will send a follow-up change.
				features = other.features;
				gpuBinaryAnnotation = other.gpuBinaryAnnotation;
				}

				static std::string translateToISA(llvm::Module &llvmModule,
				llvm::TargetMachine &targetMachine) {
				llvmModule.setDataLayout(targetMachine.createDataLayout());

				std::string targetISA;
				llvm::raw_string_ostream stream(targetISA);
				llvm::buffer_ostream pstream(stream);
				llvm::legacy::PassManager codegenPasses;
				targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
				llvm::CGFT_AssemblyFile);
				codegenPasses.run(llvmModule);
				return targetISA;
				}

				void gpu::SerializeToBlobPass::runOnOperation() {
				// Lower the module to an LLVM IR module using a separate context to enable
				// multi-threaded processing.
				llvm::LLVMContext llvmContext;
				std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext);
				if (!llvmModule)
				return signalPassFailure();

				// Lower the LLVM IR module to target ISA.
				std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();
				if (!targetMachine)
				return signalPassFailure();

				std::string targetISA = translateToISA(llvmModule, targetMachine);

				// Serialize the target ISA.
				std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);
				if (!blob)
				return signalPassFailure();

				// Add the blob as module attribute.
				auto attr = StringAttr::get(&getContext(), {blob->data(), blob->size()});
				getOperation()->setAttr(gpuBinaryAnnotation, attr);
				}

				std::unique_ptr<llvm::TargetMachine>
				gpu::SerializeToBlobPass::createTargetMachine() {
				Location loc = getOperation().getLoc();
				std::string error;
				const llvm::Target *target =
				llvm::TargetRegistry::lookupTarget(triple, error);
				if (!target) {
				emitError(loc, Twine("failed to lookup target: ") + error);
				return {};
				}
				llvm::TargetMachine *machine =
				target->createTargetMachine(triple, chip, features, {}, {});
				if (!machine) {
				emitError(loc, "failed to create target machine");
				return {};
				}

				return std::unique_ptr<llvm::TargetMachine>{machine};
				}

This is an archive of the discontinued LLVM Phabricator instance.

[mlir] Add base class for GpuKernelToBlobPass
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 329590

mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h

mlir/include/mlir/Dialect/GPU/Passes.h

mlir/lib/Conversion/GPUCommon/CMakeLists.txt

mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp

mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp

mlir/lib/Dialect/GPU/CMakeLists.txt

mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[mlir] Add base class for GpuKernelToBlobPassClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 329590

mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h

mlir/include/mlir/Dialect/GPU/Passes.h

mlir/lib/Conversion/GPUCommon/CMakeLists.txt

mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp

mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp

mlir/lib/Dialect/GPU/CMakeLists.txt

mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp

[mlir] Add base class for GpuKernelToBlobPass
ClosedPublic