Diff 265806

mlir/CMakeLists.txt

	Show All 25 Lines
	if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)			if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
	set(MLIR_CUDA_CONVERSIONS_ENABLED 1)			set(MLIR_CUDA_CONVERSIONS_ENABLED 1)
	else()			else()
	set(MLIR_CUDA_CONVERSIONS_ENABLED 0)			set(MLIR_CUDA_CONVERSIONS_ENABLED 0)
	endif()			endif()
	# TODO: we should use a config.h file like LLVM does			# TODO: we should use a config.h file like LLVM does
	add_definitions(-DMLIR_CUDA_CONVERSIONS_ENABLED=${MLIR_CUDA_CONVERSIONS_ENABLED})			add_definitions(-DMLIR_CUDA_CONVERSIONS_ENABLED=${MLIR_CUDA_CONVERSIONS_ENABLED})

				# Build the ROCm conversions and run according tests if the AMDGPU backend
				# is available
				if ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)
				set(MLIR_ROCM_CONVERSIONS_ENABLED 1)
				else()
				set(MLIR_ROCM_CONVERSIONS_ENABLED 0)
				endif()
				add_definitions(-DMLIR_ROCM_CONVERSIONS_ENABLED=${MLIR_ROCM_CONVERSIONS_ENABLED})

	set(MLIR_CUDA_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir CUDA runner")			set(MLIR_CUDA_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir CUDA runner")
	set(MLIR_VULKAN_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir Vulkan runner")			set(MLIR_VULKAN_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir Vulkan runner")

	option(MLIR_INCLUDE_TESTS			option(MLIR_INCLUDE_TESTS
	"Generate build targets for the MLIR unit tests."			"Generate build targets for the MLIR unit tests."
	${LLVM_INCLUDE_TESTS})			${LLVM_INCLUDE_TESTS})

	include_directories( "include")			include_directories( "include")
	▲ Show 20 Lines • Show All 61 Lines • Show Last 20 Lines

mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h

	//===- GPUCommonPass.h - MLIR GPU runtime support -------------------------===//			//===- GPUCommonPass.h - MLIR GPU runtime support -------------------------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	#ifndef MLIR_CONVERSION_GPUCOMMON_GPUCOMMONPASS_H_			#ifndef MLIR_CONVERSION_GPUCOMMON_GPUCOMMONPASS_H_
	#define MLIR_CONVERSION_GPUCOMMON_GPUCOMMONPASS_H_			#define MLIR_CONVERSION_GPUCOMMON_GPUCOMMONPASS_H_

	#include "mlir/Support/LLVM.h"			#include "mlir/Support/LLVM.h"
				#include "llvm/IR/Module.h"
				rriddleUnsubmitted Done Reply Inline Actions Is this header really necessary? rriddle: Is this header really necessary?
				whchungAuthorUnsubmitted Done Reply Inline Actions @rriddle it's necessary so `llvm::Module` is visible. Forward declaration is not possible because `sizeof(llvm::Module)` would be used in one of the unit test. whchung: @rriddle it's necessary so `llvm::Module` is visible. Forward declaration is not possible…
	#include <functional>			#include <functional>
	#include <memory>			#include <memory>
	#include <string>			#include <string>
	#include <vector>			#include <vector>

	namespace mlir {			namespace mlir {

	class Location;			class Location;
				class LogicalResult;
	class ModuleOp;			class ModuleOp;
				class Operation;

	template <typename T>			template <typename T>
	class OperationPass;			class OperationPass;

				namespace gpu {
				class GPUModuleOp;
				} // namespace gpu

				namespace LLVM {
				class LLVMDialect;
				} // namespace LLVM

				using OwnedBlob = std::unique_ptr<std::vector<char>>;
				using BlobGenerator =
				std::function<OwnedBlob(const std::string &, Location, StringRef)>;
				using InitBackendCallback = std::function<LogicalResult()>;
				herhutUnsubmitted Done Reply Inline Actions Do we need to actually do this in this pass or could we do this in the code that constructs the pipeline? If the calling context has to provide this callback, it could also just call the init itself. herhut: Do we need to actually do this in this pass or could we do this in the code that constructs the…
				using LoweringCallback =
				herhutUnsubmitted Done Reply Inline Actions Could this be a `std::function<std::unique_ptr<llvm::Module>(Operation )>` instead, signalling failure by returning a `nullptr`? herhut:* Could this be a `std::function<std::unique_ptr<llvm::Module>(Operation *)>` instead, signalling…
				std::function<LogicalResult(Operation *, std::unique_ptr<llvm::Module> &)>;

	/// Creates a pass to convert a gpu.launch_func operation into a sequence of			/// Creates a pass to convert a gpu.launch_func operation into a sequence of
	/// GPU runtime calls.			/// GPU runtime calls.
	///			///
	/// This pass does not generate code to call GPU runtime APIs directly but			/// This pass does not generate code to call GPU runtime APIs directly but
	/// instead uses a small wrapper library that exports a stable and conveniently			/// instead uses a small wrapper library that exports a stable and conveniently
	/// typed ABI on top of GPU runtimes such as CUDA or ROCm (HIP).			/// typed ABI on top of GPU runtimes such as CUDA or ROCm (HIP).
	std::unique_ptr<OperationPass<ModuleOp>>			std::unique_ptr<OperationPass<ModuleOp>>
	createConvertGpuLaunchFuncToGpuRuntimeCallsPass();			createConvertGpuLaunchFuncToGpuRuntimeCallsPass();

				/// Creates a pass to convert kernel functions into GPU target object blobs.
				///
				/// This transformation takes the body of each function that is annotated with
				/// the 'gpu.kernel' attribute, copies it to a new LLVM module, compiles the
				/// module with help of the GPU backend to targte object and then invokes
				herhutUnsubmitted Done Reply Inline Actions Nit: `to target object` herhut: Nit: `to target object`
				/// the provided blobGenerator to produce a binary blob. Such blob is then
				/// attached as a string attribute to the kernel function.
				///
				/// Following callbacks are to be provided by user:
				/// - initBackendCallback : initialize corresponding LLVM backend.
				/// - loweringCallback : lower the module to an LLVM module.
				/// - blobGenerator : build a blob executable on target GPU.
				///
				/// Information wrt LLVM backend are to be supplied by user:
				/// - triple : target triple to be used.
				/// - targetChip : mcpu to be used.
				/// - features : target-specific features to be used.
				///
				/// Information about result attribute is to be specified by user:
				/// - gpuBinaryAnnotation : the name of the attribute which contains the blob.
				///
				/// After the transformation, the body of the kernel function is removed (i.e.,
				/// it is turned into a declaration).
				std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
				createConvertGPUKernelToBlobPass(InitBackendCallback initBackendCallback,
				LoweringCallback loweringCallback,
				BlobGenerator blobGenerator, StringRef triple,
				StringRef targetChip, StringRef features,
				StringRef gpuBinaryAnnotation);

	} // namespace mlir			} // namespace mlir

	#endif // MLIR_CONVERSION_GPUCOMMON_GPUCOMMONPASS_H_			#endif // MLIR_CONVERSION_GPUCOMMON_GPUCOMMONPASS_H_

mlir/include/mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h

This file was deleted.

	//===- GPUToCUDAPass.h - MLIR CUDA runtime support --------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	#ifndef MLIR_CONVERSION_GPUTOCUDA_GPUTOCUDAPASS_H_
	#define MLIR_CONVERSION_GPUTOCUDA_GPUTOCUDAPASS_H_

	#include "mlir/Support/LLVM.h"
	#include <functional>
	#include <memory>
	#include <string>
	#include <vector>

	namespace mlir {

	class Location;
	class ModuleOp;

	template <typename T> class OperationPass;

	namespace gpu {
	class GPUModuleOp;
	} // namespace gpu

	namespace LLVM {
	class LLVMDialect;
	} // namespace LLVM

	using OwnedCubin = std::unique_ptr<std::vector<char>>;
	using CubinGenerator =
	std::function<OwnedCubin(const std::string &, Location, StringRef)>;

	/// Creates a pass to convert kernel functions into CUBIN blobs.
	///
	/// This transformation takes the body of each function that is annotated with
	/// the 'nvvm.kernel' attribute, copies it to a new LLVM module, compiles the
	/// module with help of the nvptx backend to PTX and then invokes the provided
	/// cubinGenerator to produce a binary blob (the cubin). Such blob is then
	/// attached as a string attribute named 'nvvm.cubin' to the kernel function.
	/// After the transformation, the body of the kernel function is removed (i.e.,
	/// it is turned into a declaration).
	std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
	createConvertGPUKernelToCubinPass(CubinGenerator cubinGenerator);

	} // namespace mlir

	#endif // MLIR_CONVERSION_GPUTOCUDA_GPUTOCUDAPASS_H_

mlir/include/mlir/InitAllPasses.h

	Show All 10 Lines
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef MLIR_INITALLPASSES_H_			#ifndef MLIR_INITALLPASSES_H_
	#define MLIR_INITALLPASSES_H_			#define MLIR_INITALLPASSES_H_

	#include "mlir/Conversion/AVX512ToLLVM/ConvertAVX512ToLLVM.h"			#include "mlir/Conversion/AVX512ToLLVM/ConvertAVX512ToLLVM.h"
	#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"			#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
	#include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h"
	#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"			#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
	#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"			#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
	#include "mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h"			#include "mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h"
	#include "mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h"			#include "mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h"
	#include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h"			#include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h"
	#include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h"			#include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h"
	#include "mlir/Conversion/LinalgToStandard/LinalgToStandard.h"			#include "mlir/Conversion/LinalgToStandard/LinalgToStandard.h"
	#include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"			#include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
	▲ Show 20 Lines • Show All 73 Lines • Show Last 20 Lines

mlir/lib/Conversion/CMakeLists.txt

	add_subdirectory(AffineToStandard)			add_subdirectory(AffineToStandard)
	add_subdirectory(AVX512ToLLVM)			add_subdirectory(AVX512ToLLVM)
	add_subdirectory(GPUCommon)			add_subdirectory(GPUCommon)
	add_subdirectory(GPUToCUDA)
	add_subdirectory(GPUToNVVM)			add_subdirectory(GPUToNVVM)
	add_subdirectory(GPUToROCDL)			add_subdirectory(GPUToROCDL)
	add_subdirectory(GPUToSPIRV)			add_subdirectory(GPUToSPIRV)
	add_subdirectory(GPUToVulkan)			add_subdirectory(GPUToVulkan)
	add_subdirectory(LinalgToLLVM)			add_subdirectory(LinalgToLLVM)
	add_subdirectory(LinalgToSPIRV)			add_subdirectory(LinalgToSPIRV)
	add_subdirectory(LinalgToStandard)			add_subdirectory(LinalgToStandard)
	add_subdirectory(SCFToGPU)			add_subdirectory(SCFToGPU)
	add_subdirectory(SCFToStandard)			add_subdirectory(SCFToStandard)
	add_subdirectory(StandardToLLVM)			add_subdirectory(StandardToLLVM)
	add_subdirectory(StandardToSPIRV)			add_subdirectory(StandardToSPIRV)
	add_subdirectory(VectorToLLVM)			add_subdirectory(VectorToLLVM)
	add_subdirectory(VectorToSCF)			add_subdirectory(VectorToSCF)

mlir/lib/Conversion/GPUCommon/CMakeLists.txt

	set(SOURCES
	ConvertLaunchFuncToRuntimeCalls.cpp
	)

	add_mlir_conversion_library(MLIRGPUtoGPURuntimeTransforms			add_mlir_conversion_library(MLIRGPUtoGPURuntimeTransforms
	${SOURCES}			ConvertLaunchFuncToRuntimeCalls.cpp
				ConvertKernelFuncToBlob.cpp

	DEPENDS			DEPENDS
	MLIRConversionPassIncGen			MLIRConversionPassIncGen
	intrinsics_gen			intrinsics_gen

	LINK_COMPONENTS			LINK_COMPONENTS
	Core			Core

	LINK_LIBS PUBLIC			LINK_LIBS PUBLIC
	MLIRGPU			MLIRGPU
	MLIRIR			MLIRIR
	MLIRLLVMIR			MLIRLLVMIR
	MLIRPass			MLIRPass
	MLIRSupport			MLIRSupport
	)			)

mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp

This file was added.

				//===- ConvertKernelFuncToBlob.cpp - MLIR GPU lowering passes -------------===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				//
				// This file implements a pass to convert gpu kernel functions into a
				// corresponding binary blob that can be executed on a GPU. Currently
				// only translates the function itself but no dependencies.
				//
				//===----------------------------------------------------------------------===//

				#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"

				#include "mlir/Dialect/GPU/GPUDialect.h"
				#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
				#include "mlir/IR/Attributes.h"
				#include "mlir/IR/Builders.h"
				#include "mlir/IR/Function.h"
				#include "mlir/IR/Module.h"
				#include "mlir/Pass/Pass.h"
				#include "mlir/Pass/PassRegistry.h"
				#include "mlir/Support/LogicalResult.h"

				#include "llvm/ADT/Optional.h"
				#include "llvm/ADT/Twine.h"
				#include "llvm/IR/Constants.h"
				#include "llvm/IR/LegacyPassManager.h"
				#include "llvm/IR/Module.h"
				#include "llvm/Support/Error.h"
				#include "llvm/Support/Mutex.h"
				#include "llvm/Support/TargetRegistry.h"
				#include "llvm/Support/TargetSelect.h"
				#include "llvm/Target/TargetMachine.h"

				using namespace mlir;

				namespace {

				/// A pass converting tagged kernel modules to a blob with target instructions.
				///
				/// If tagged as a kernel module, each contained function is translated to
				/// user-specified IR. A user provided BlobGenerator then compiles the IR to
				/// GPU binary code, which is then attached as an attribute to the function.
				/// The function body is erased.
				class GpuKernelToBlobPass
				: public PassWrapper<GpuKernelToBlobPass, OperationPass<gpu::GPUModuleOp>> {
				public:
				GpuKernelToBlobPass(InitBackendCallback initBackendCallback,
				LoweringCallback loweringCallback,
				BlobGenerator blobGenerator, StringRef triple,
				StringRef targetChip, StringRef features,
				StringRef gpuBinaryAnnotation)
				: initBackendCallback(initBackendCallback),
				loweringCallback(loweringCallback), blobGenerator(blobGenerator),
				triple(triple), targetChip(targetChip), features(features),
				blobAnnotation(gpuBinaryAnnotation) {}

				void runOnOperation() override {
				gpu::GPUModuleOp module = getOperation();

				// Lock access to the llvm context.
				llvm::sys::SmartScopedLock<true> scopedLock(
				module.getContext()
				->getRegisteredDialect<LLVM::LLVMDialect>()
				->getLLVMContextMutex());

				// Initialize LLVM backend.
				if (!succeeded(initBackendCallback()))
				return signalPassFailure();

				// Lower the module to a llvm module.
				std::unique_ptr<llvm::Module> llvmModule = nullptr;
				if (!succeeded(loweringCallback(module, llvmModule)))
				return signalPassFailure();

				// Translate the llvm module to a target blob and attach the result as
				// attribute to the module.
				if (auto blobAttr = translateGPUModuleToBinaryAnnotation(
				*llvmModule, module.getLoc(), module.getName()))
				module.setAttr(blobAnnotation, blobAttr);
				else
				signalPassFailure();
				}

				private:
				std::string translateModuleToISA(llvm::Module &module,
				llvm::TargetMachine &targetMachine);

				/// Converts llvmModule to a lob with target instructions using the
				herhutUnsubmitted Done Reply Inline Actions `lob` -> `blob` herhut: `lob` -> `blob`
				/// user-provided generator. Location is used for error reporting and name is
				/// forwarded to the blob generator to use in its logging mechanisms.
				OwnedBlob convertModuleToBlob(llvm::Module &llvmModule, Location loc,
				StringRef name);

				/// Translates llvmModule to a blob with target instructions and returns the
				/// result as attribute.
				StringAttr translateGPUModuleToBinaryAnnotation(llvm::Module &llvmModule,
				Location loc, StringRef name);

				InitBackendCallback initBackendCallback;
				LoweringCallback loweringCallback;
				BlobGenerator blobGenerator;
				llvm::Triple triple;
				StringRef targetChip;
				StringRef features;
				StringRef blobAnnotation;
				};

				} // anonymous namespace

				std::string
				GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module,
				llvm::TargetMachine &targetMachine) {
				std::string targetISA;
				{
				// Clone the llvm module into a new context to enable concurrent compilation
				// with multiple threads.
				llvm::LLVMContext llvmContext;
				auto clone = LLVM::cloneModuleIntoNewContext(&llvmContext, &module);

				llvm::raw_string_ostream stream(targetISA);
				llvm::buffer_ostream pstream(stream);
				llvm::legacy::PassManager codegenPasses;
				targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
				llvm::CGFT_AssemblyFile);
				codegenPasses.run(*clone);
				}

				return targetISA;
				}

				OwnedBlob GpuKernelToBlobPass::convertModuleToBlob(llvm::Module &llvmModule,
				Location loc,
				StringRef name) {
				std::unique_ptr<llvm::TargetMachine> targetMachine;
				{
				std::string error;
				const llvm::Target *target =
				llvm::TargetRegistry::lookupTarget("", triple, error);
				if (target == nullptr) {
				emitError(loc, "cannot initialize target triple");
				return {};
				}
				targetMachine.reset(target->createTargetMachine(triple.str(), targetChip,
				features, {}, {}));
				}

				llvmModule.setDataLayout(targetMachine->createDataLayout());

				auto targetISA = translateModuleToISA(llvmModule, *targetMachine);

				return blobGenerator(targetISA, loc, name);
				}

				StringAttr GpuKernelToBlobPass::translateGPUModuleToBinaryAnnotation(
				llvm::Module &llvmModule, Location loc, StringRef name) {
				auto blob = convertModuleToBlob(llvmModule, loc, name);
				if (!blob)
				return {};
				return StringAttr::get({blob->data(), blob->size()}, loc->getContext());
				}

				std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
				mlir::createConvertGPUKernelToBlobPass(InitBackendCallback initBackendCallback,
				LoweringCallback loweringCallback,
				BlobGenerator blobGenerator,
				StringRef triple, StringRef targetChip,
				StringRef features,
				StringRef gpuBinaryAnnotation) {
				return std::make_unique<GpuKernelToBlobPass>(
				initBackendCallback, loweringCallback, blobGenerator, triple, targetChip,
				features, gpuBinaryAnnotation);
				}

mlir/lib/Conversion/GPUToCUDA/CMakeLists.txt

This file was deleted.

	set(LLVM_OPTIONAL_SOURCES
	ConvertKernelFuncToCubin.cpp
	)

	if (MLIR_CUDA_CONVERSIONS_ENABLED)
	set(NVPTX_LIBS
	MC
	NVPTXCodeGen
	NVPTXDesc
	NVPTXInfo
	)

	add_mlir_conversion_library(MLIRGPUtoCUDATransforms
	ConvertKernelFuncToCubin.cpp

	DEPENDS
	MLIRConversionPassIncGen
	intrinsics_gen

	LINK_COMPONENTS
	Core
	${NVPTX_LIBS}

	LINK_LIBS PUBLIC
	MLIRGPU
	MLIRIR
	MLIRLLVMIR
	MLIRNVVMIR
	MLIRPass
	MLIRSupport
	MLIRTargetNVVMIR
	)
	else()
	add_library(MLIRGPUtoCUDATransforms INTERFACE IMPORTED GLOBAL)
	endif()

mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp

This file was deleted.

	//===- ConvertKernelFuncToCubin.cpp - MLIR GPU lowering passes ------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements a pass to convert gpu kernel functions into a
	// corresponding binary blob that can be executed on a CUDA GPU. Currently
	// only translates the function itself but no dependencies.
	//
	//===----------------------------------------------------------------------===//

	#include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h"

	#include "mlir/Dialect/GPU/GPUDialect.h"
	#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
	#include "mlir/IR/Attributes.h"
	#include "mlir/IR/Builders.h"
	#include "mlir/IR/Function.h"
	#include "mlir/IR/Module.h"
	#include "mlir/Pass/Pass.h"
	#include "mlir/Pass/PassRegistry.h"
	#include "mlir/Support/LogicalResult.h"
	#include "mlir/Target/NVVMIR.h"

	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/LegacyPassManager.h"
	#include "llvm/IR/Module.h"
	#include "llvm/Support/Error.h"
	#include "llvm/Support/Mutex.h"
	#include "llvm/Support/TargetRegistry.h"
	#include "llvm/Support/TargetSelect.h"
	#include "llvm/Target/TargetMachine.h"

	using namespace mlir;

	namespace {
	// TODO(herhut): Move to shared location.
	static constexpr const char *kCubinAnnotation = "nvvm.cubin";

	/// A pass converting tagged kernel modules to cubin blobs.
	///
	/// If tagged as a kernel module, each contained function is translated to NVVM
	/// IR and further to PTX. A user provided CubinGenerator compiles the PTX to
	/// GPU binary code, which is then attached as an attribute to the function. The
	/// function body is erased.
	class GpuKernelToCubinPass
	: public PassWrapper<GpuKernelToCubinPass,
	OperationPass<gpu::GPUModuleOp>> {
	public:
	GpuKernelToCubinPass(CubinGenerator cubinGenerator)
	: cubinGenerator(cubinGenerator) {}

	void runOnOperation() override {
	gpu::GPUModuleOp module = getOperation();

	// Lock access to the llvm context.
	llvm::sys::SmartScopedLock<true> scopedLock(
	module.getContext()
	->getRegisteredDialect<LLVM::LLVMDialect>()
	->getLLVMContextMutex());

	// Make sure the NVPTX target is initialized.
	LLVMInitializeNVPTXTarget();
	LLVMInitializeNVPTXTargetInfo();
	LLVMInitializeNVPTXTargetMC();
	LLVMInitializeNVPTXAsmPrinter();

	auto llvmModule = translateModuleToNVVMIR(module);
	if (!llvmModule)
	return signalPassFailure();

	// Translate the module to CUBIN and attach the result as attribute to the
	// module.
	if (auto cubinAttr = translateGPUModuleToCubinAnnotation(
	*llvmModule, module.getLoc(), module.getName()))
	module.setAttr(kCubinAnnotation, cubinAttr);
	else
	signalPassFailure();
	}

	private:
	std::string translateModuleToPtx(llvm::Module &module,
	llvm::TargetMachine &target_machine);

	/// Converts llvmModule to cubin using the user-provided generator. Location
	/// is used for error reporting and name is forwarded to the CUBIN generator
	/// to use in its logging mechanisms.
	OwnedCubin convertModuleToCubin(llvm::Module &llvmModule, Location loc,
	StringRef name);

	/// Translates llvmModule to cubin and returns the result as attribute.
	StringAttr translateGPUModuleToCubinAnnotation(llvm::Module &llvmModule,
	Location loc, StringRef name);

	CubinGenerator cubinGenerator;
	};

	} // anonymous namespace

	std::string GpuKernelToCubinPass::translateModuleToPtx(
	llvm::Module &module, llvm::TargetMachine &target_machine) {
	std::string ptx;
	{
	// Clone the llvm module into a new context to enable concurrent compilation
	// with multiple threads.
	// TODO(zinenko): Reevaluate model of ownership of LLVMContext in
	// LLVMDialect.
	llvm::LLVMContext llvmContext;
	auto clone = LLVM::cloneModuleIntoNewContext(&llvmContext, &module);

	llvm::raw_string_ostream stream(ptx);
	llvm::buffer_ostream pstream(stream);
	llvm::legacy::PassManager codegen_passes;
	target_machine.addPassesToEmitFile(codegen_passes, pstream, nullptr,
	llvm::CGFT_AssemblyFile);
	codegen_passes.run(*clone);
	}

	return ptx;
	}

	OwnedCubin GpuKernelToCubinPass::convertModuleToCubin(llvm::Module &llvmModule,
	Location loc,
	StringRef name) {
	std::unique_ptr<llvm::TargetMachine> targetMachine;
	{
	std::string error;
	// TODO(herhut): Make triple configurable.
	constexpr const char *cudaTriple = "nvptx64-nvidia-cuda";
	llvm::Triple triple(cudaTriple);
	const llvm::Target *target =
	llvm::TargetRegistry::lookupTarget("", triple, error);
	if (target == nullptr) {
	emitError(loc, "cannot initialize target triple");
	return {};
	}
	targetMachine.reset(
	target->createTargetMachine(triple.str(), "sm_35", "+ptx60", {}, {}));
	}

	// Set the data layout of the llvm module to match what the ptx target needs.
	llvmModule.setDataLayout(targetMachine->createDataLayout());

	auto ptx = translateModuleToPtx(llvmModule, *targetMachine);

	return cubinGenerator(ptx, loc, name);
	}

	StringAttr GpuKernelToCubinPass::translateGPUModuleToCubinAnnotation(
	llvm::Module &llvmModule, Location loc, StringRef name) {
	auto cubin = convertModuleToCubin(llvmModule, loc, name);
	if (!cubin)
	return {};
	return StringAttr::get({cubin->data(), cubin->size()}, loc->getContext());
	}

	std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
	mlir::createConvertGPUKernelToCubinPass(CubinGenerator cubinGenerator) {
	return std::make_unique<GpuKernelToCubinPass>(cubinGenerator);
	}

mlir/test/Conversion/GPUToROCm/lit.local.cfg

This file was added.

				if not config.run_rocm_tests:
				config.unsupported = True

mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir

This file was added.

				// RUN: mlir-opt %s --test-kernel-to-hsaco -split-input-file \| FileCheck %s

				// CHECK: attributes {rocdl.hsaco = "HSACO"}
				gpu.module @foo {
				llvm.func @kernel(%arg0 : !llvm.float, %arg1 : !llvm<"float*">)
				// CHECK: attributes {gpu.kernel}
				attributes { gpu.kernel } {
				llvm.return
				}
				}

				// -----

				gpu.module @bar {
				// CHECK: func @kernel_a
				llvm.func @kernel_a()
				attributes { gpu.kernel } {
				llvm.return
				}

				// CHECK: func @kernel_b
				llvm.func @kernel_b()
				attributes { gpu.kernel } {
				llvm.return
				}
				}

mlir/test/lib/Transforms/CMakeLists.txt

		if (MLIR_CUDA_CONVERSIONS_ENABLED)
		set(NVPTX_LIBS
		MC
		NVPTXCodeGen
		NVPTXDesc
		NVPTXInfo
		)
		endif()

		if (MLIR_ROCM_CONVERSIONS_ENABLED)
		set(AMDGPU_LIBS
		MC
		AMDGPUCodeGen
		AMDGPUDesc
		AMDGPUInfo
		)
		endif()

# Exclude tests from libMLIR.so		# Exclude tests from libMLIR.so
add_mlir_library(MLIRTestTransforms		add_mlir_library(MLIRTestTransforms
TestAllReduceLowering.cpp		TestAllReduceLowering.cpp
TestBufferPlacement.cpp		TestBufferPlacement.cpp
TestCallGraph.cpp		TestCallGraph.cpp
TestConstantFold.cpp		TestConstantFold.cpp
TestConvertGPUKernelToCubin.cpp		TestConvertGPUKernelToCubin.cpp
		TestConvertGPUKernelToHsaco.cpp
TestDominance.cpp		TestDominance.cpp
TestLoopFusion.cpp		TestLoopFusion.cpp
TestGpuMemoryPromotion.cpp		TestGpuMemoryPromotion.cpp
TestGpuParallelLoopMapping.cpp		TestGpuParallelLoopMapping.cpp
TestInlining.cpp		TestInlining.cpp
TestLinalgTransforms.cpp		TestLinalgTransforms.cpp
TestLiveness.cpp		TestLiveness.cpp
TestLoopMapping.cpp		TestLoopMapping.cpp
Show All 10 Lines	add_mlir_library(MLIRTestTransforms

ADDITIONAL_HEADER_DIRS		ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms		${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms

DEPENDS		DEPENDS
MLIRStandardOpsIncGen		MLIRStandardOpsIncGen
MLIRTestVectorTransformPatternsIncGen		MLIRTestVectorTransformPatternsIncGen

		LINK_COMPONENTS
		${AMDGPU_LIBS}
		${NVPTX_LIBS}

LINK_LIBS PUBLIC		LINK_LIBS PUBLIC
MLIRAffineOps		MLIRAffineOps
MLIRAnalysis		MLIRAnalysis
MLIREDSC		MLIREDSC
MLIRGPU		MLIRGPU
MLIRGPUtoCUDATransforms		MLIRGPUtoGPURuntimeTransforms
MLIRLinalgOps		MLIRLinalgOps
MLIRLinalgTransforms		MLIRLinalgTransforms
		MLIRNVVMIR
MLIRSCF		MLIRSCF
MLIRGPU		MLIRGPU
MLIRPass		MLIRPass
		MLIRROCDLIR
MLIRStandardOpsTransforms		MLIRStandardOpsTransforms
		MLIRTargetNVVMIR
		MLIRTargetROCDLIR
MLIRTestDialect		MLIRTestDialect
MLIRTransformUtils		MLIRTransformUtils
MLIRVectorToSCF		MLIRVectorToSCF
MLIRVector		MLIRVector
)		)

include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../Dialect/Test)		include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../Dialect/Test)
include_directories(${CMAKE_CURRENT_BINARY_DIR}/../Dialect/Test)		include_directories(${CMAKE_CURRENT_BINARY_DIR}/../Dialect/Test)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../DeclarativeTransforms)		include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../DeclarativeTransforms)
include_directories(${CMAKE_CURRENT_BINARY_DIR}/../DeclarativeTransforms)		include_directories(${CMAKE_CURRENT_BINARY_DIR}/../DeclarativeTransforms)

mlir/test/lib/Transforms/TestConvertGPUKernelToCubin.cpp

	//===- TestConvertGPUKernelToCubin.cpp - Test gpu kernel cubin lowering ---===//			//===- TestConvertGPUKernelToCubin.cpp - Test gpu kernel cubin lowering ---===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h"			#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
	#include "mlir/Pass/Pass.h"			#include "mlir/Pass/Pass.h"
	#include "mlir/Pass/PassManager.h"			#include "mlir/Pass/PassManager.h"
				#include "mlir/Target/NVVMIR.h"
				#include "llvm/Support/TargetSelect.h"
	using namespace mlir;			using namespace mlir;

	#if MLIR_CUDA_CONVERSIONS_ENABLED			#if MLIR_CUDA_CONVERSIONS_ENABLED
	static OwnedCubin compilePtxToCubinForTesting(const std::string &, Location,			static LogicalResult initNVPTXBackendCallback() {
				LLVMInitializeNVPTXTarget();
				LLVMInitializeNVPTXTargetInfo();
				LLVMInitializeNVPTXTargetMC();
				LLVMInitializeNVPTXAsmPrinter();
				return success();
				}

				static LogicalResult
				compileModuleToNVVMIR(Operation *m, std::unique_ptr<llvm::Module> &llvmModule) {
				llvmModule = translateModuleToNVVMIR(m);
				if (llvmModule)
				return success();
				return failure();
				}

				static OwnedBlob compilePtxToCubinForTesting(const std::string &, Location,
	StringRef) {			StringRef) {
	const char data[] = "CUBIN";			const char data[] = "CUBIN";
	return std::make_unique<std::vector<char>>(data, data + sizeof(data) - 1);			return std::make_unique<std::vector<char>>(data, data + sizeof(data) - 1);
	}			}

	namespace mlir {			namespace mlir {
	void registerTestConvertGPUKernelToCubinPass() {			void registerTestConvertGPUKernelToCubinPass() {
	PassPipelineRegistration<>("test-kernel-to-cubin",			PassPipelineRegistration<>(
				"test-kernel-to-cubin",
	"Convert all kernel functions to CUDA cubin blobs",			"Convert all kernel functions to CUDA cubin blobs",
	[](OpPassManager &pm) {			[](OpPassManager &pm) {
	pm.addPass(createConvertGPUKernelToCubinPass(			pm.addPass(createConvertGPUKernelToBlobPass(
	compilePtxToCubinForTesting));			initNVPTXBackendCallback, compileModuleToNVVMIR,
				compilePtxToCubinForTesting, "nvptx64-nvidia-cuda", "sm_35",
				"+ptx60", "nvvm.cubin"));
	});			});
	}			}
	} // namespace mlir			} // namespace mlir
	#endif			#endif

mlir/test/lib/Transforms/TestConvertGPUKernelToHsaco.cpp

This file was added.

				//===- TestConvertGPUKernelToHsaco.cpp - Test gpu kernel hsaco lowering ---===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//

				#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
				#include "mlir/Pass/Pass.h"
				#include "mlir/Pass/PassManager.h"
				#include "mlir/Target/ROCDLIR.h"
				#include "llvm/Support/TargetSelect.h"
				using namespace mlir;

				#if MLIR_ROCM_CONVERSIONS_ENABLED
				static LogicalResult initAMDGPUBackendCallback() {
				LLVMInitializeAMDGPUTarget();
				LLVMInitializeAMDGPUTargetInfo();
				LLVMInitializeAMDGPUTargetMC();
				LLVMInitializeAMDGPUAsmPrinter();
				return success();
				}

				static LogicalResult
				compileModuleToROCDLIR(Operation *m,
				std::unique_ptr<llvm::Module> &llvmModule) {
				llvmModule = translateModuleToROCDLIR(m);
				if (llvmModule)
				return success();
				return failure();
				}

				static OwnedBlob compileIsaToHsacoForTesting(const std::string &, Location,
				StringRef) {
				const char data[] = "HSACO";
				return std::make_unique<std::vector<char>>(data, data + sizeof(data) - 1);
				}

				namespace mlir {
				void registerTestConvertGPUKernelToHsacoPass() {
				PassPipelineRegistration<>(
				"test-kernel-to-hsaco",
				"Convert all kernel functions to ROCm hsaco blobs",
				[](OpPassManager &pm) {
				pm.addPass(createConvertGPUKernelToBlobPass(
				initAMDGPUBackendCallback, compileModuleToROCDLIR,
				compileIsaToHsacoForTesting, "amdgcn-amd-amdhsa", "gfx900",
				"-code-object-v3", "rocdl.hsaco"));
				});
				}
				} // namespace mlir
				#endif

mlir/test/lit.site.cfg.py.in

	Show All 32 Lines
	config.mlir_obj_root = "@MLIR_BINARY_DIR@"			config.mlir_obj_root = "@MLIR_BINARY_DIR@"
	config.mlir_runner_utils_dir = "@MLIR_RUNNER_UTILS_DIR@"			config.mlir_runner_utils_dir = "@MLIR_RUNNER_UTILS_DIR@"
	config.mlir_tools_dir = "@MLIR_TOOLS_DIR@"			config.mlir_tools_dir = "@MLIR_TOOLS_DIR@"
	config.linalg_test_lib_dir = "@MLIR_DIALECT_LINALG_INTEGRATION_TEST_LIB_DIR@"			config.linalg_test_lib_dir = "@MLIR_DIALECT_LINALG_INTEGRATION_TEST_LIB_DIR@"
	config.build_examples = @LLVM_BUILD_EXAMPLES@			config.build_examples = @LLVM_BUILD_EXAMPLES@
	config.run_cuda_tests = @MLIR_CUDA_CONVERSIONS_ENABLED@			config.run_cuda_tests = @MLIR_CUDA_CONVERSIONS_ENABLED@
	config.cuda_wrapper_library_dir = "@MLIR_CUDA_WRAPPER_LIBRARY_DIR@"			config.cuda_wrapper_library_dir = "@MLIR_CUDA_WRAPPER_LIBRARY_DIR@"
	config.enable_cuda_runner = @MLIR_CUDA_RUNNER_ENABLED@			config.enable_cuda_runner = @MLIR_CUDA_RUNNER_ENABLED@
				config.run_rocm_tests = @MLIR_ROCM_CONVERSIONS_ENABLED@
	config.vulkan_wrapper_library_dir = "@MLIR_VULKAN_WRAPPER_LIBRARY_DIR@"			config.vulkan_wrapper_library_dir = "@MLIR_VULKAN_WRAPPER_LIBRARY_DIR@"
	config.enable_vulkan_runner = @MLIR_VULKAN_RUNNER_ENABLED@			config.enable_vulkan_runner = @MLIR_VULKAN_RUNNER_ENABLED@

	# Support substitution of the tools_dir with user parameters. This is			# Support substitution of the tools_dir with user parameters. This is
	# used when we can't determine the tool dir at configuration time.			# used when we can't determine the tool dir at configuration time.
	try:			try:
	config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params			config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params
	config.llvm_shlib_dir = config.llvm_shlib_dir % lit_config.params			config.llvm_shlib_dir = config.llvm_shlib_dir % lit_config.params
	Show All 10 Lines

mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp

Show All 9 Lines
// translating MLIR to NVVM/LVVM IR before JIT-compiling and executing the		// translating MLIR to NVVM/LVVM IR before JIT-compiling and executing the
// latter.		// latter.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "llvm/ADT/STLExtras.h"		#include "llvm/ADT/STLExtras.h"

#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"		#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h"
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"		#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"		#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"		#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
#include "mlir/Dialect/GPU/GPUDialect.h"		#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/GPU/Passes.h"		#include "mlir/Dialect/GPU/Passes.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"		#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"		#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/ExecutionEngine/JitRunner.h"		#include "mlir/ExecutionEngine/JitRunner.h"
#include "mlir/ExecutionEngine/OptUtils.h"		#include "mlir/ExecutionEngine/OptUtils.h"
#include "mlir/IR/Function.h"		#include "mlir/IR/Function.h"
#include "mlir/IR/Module.h"		#include "mlir/IR/Module.h"
#include "mlir/InitAllDialects.h"		#include "mlir/InitAllDialects.h"
#include "mlir/Pass/Pass.h"		#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"		#include "mlir/Pass/PassManager.h"
		#include "mlir/Target/NVVMIR.h"
#include "mlir/Transforms/DialectConversion.h"		#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/Passes.h"		#include "mlir/Transforms/Passes.h"
#include "llvm/Support/InitLLVM.h"		#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/TargetSelect.h"		#include "llvm/Support/TargetSelect.h"

#include "cuda.h"		#include "cuda.h"

using namespace mlir;		using namespace mlir;
Show All 11 Lines	#define RETURN_ON_CUDA_ERROR(expr, msg) \
{ \		{ \
auto _cuda_error = (expr); \		auto _cuda_error = (expr); \
if (_cuda_error != CUDA_SUCCESS) { \		if (_cuda_error != CUDA_SUCCESS) { \
emit_cuda_error(msg, jitErrorBuffer, _cuda_error, loc); \		emit_cuda_error(msg, jitErrorBuffer, _cuda_error, loc); \
return {}; \		return {}; \
} \		} \
}		}

OwnedCubin compilePtxToCubin(const std::string ptx, Location loc,		static LogicalResult initNVPTXBackend() {
		// Make sure the NVPTX target is initialized.
		LLVMInitializeNVPTXTarget();
		LLVMInitializeNVPTXTargetInfo();
		LLVMInitializeNVPTXTargetMC();
		LLVMInitializeNVPTXAsmPrinter();
		return success();
		}

		static LogicalResult
		compileModuleToNVVMIR(Operation *m, std::unique_ptr<llvm::Module> &llvmModule) {
		llvmModule = translateModuleToNVVMIR(m);
		if (llvmModule)
		return success();
		return failure();
		}

		OwnedBlob compilePtxToCubin(const std::string ptx, Location loc,
StringRef name) {		StringRef name) {
char jitErrorBuffer[4096] = {0};		char jitErrorBuffer[4096] = {0};

RETURN_ON_CUDA_ERROR(cuInit(0), "cuInit");		RETURN_ON_CUDA_ERROR(cuInit(0), "cuInit");

// Linking requires a device context.		// Linking requires a device context.
CUdevice device;		CUdevice device;
RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0), "cuDeviceGet");		RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0), "cuDeviceGet");
CUcontext context;		CUcontext context;
Show All 22 Lines	RETURN_ON_CUDA_ERROR(
"cuLinkAddData");		"cuLinkAddData");

void *cubinData;		void *cubinData;
size_t cubinSize;		size_t cubinSize;
RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize),		RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize),
"cuLinkComplete");		"cuLinkComplete");

char cubinAsChar = static_cast<char >(cubinData);		char cubinAsChar = static_cast<char >(cubinData);
OwnedCubin result =		OwnedBlob result =
std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);		std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);

// This will also destroy the cubin data.		// This will also destroy the cubin data.
RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState), "cuLinkDestroy");		RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState), "cuLinkDestroy");

return result;		return result;
}		}

static LogicalResult runMLIRPasses(ModuleOp m) {		static LogicalResult runMLIRPasses(ModuleOp m) {
PassManager pm(m.getContext());		PassManager pm(m.getContext());
applyPassManagerCLOptions(pm);		applyPassManagerCLOptions(pm);

pm.addPass(createGpuKernelOutliningPass());		pm.addPass(createGpuKernelOutliningPass());
auto &kernelPm = pm.nest<gpu::GPUModuleOp>();		auto &kernelPm = pm.nest<gpu::GPUModuleOp>();
kernelPm.addPass(createStripDebugInfoPass());		kernelPm.addPass(createStripDebugInfoPass());
kernelPm.addPass(createLowerGpuOpsToNVVMOpsPass());		kernelPm.addPass(createLowerGpuOpsToNVVMOpsPass());
kernelPm.addPass(createConvertGPUKernelToCubinPass(&compilePtxToCubin));		kernelPm.addPass(createConvertGPUKernelToBlobPass(
		&initNVPTXBackend, &compileModuleToNVVMIR, &compilePtxToCubin,
		"nvptx64-nvidia-cuda", "sm_35", "+ptx60", "nvvm.cubin"));
pm.addPass(createLowerToLLVMPass());		pm.addPass(createLowerToLLVMPass());
pm.addPass(createConvertGpuLaunchFuncToGpuRuntimeCallsPass());		pm.addPass(createConvertGpuLaunchFuncToGpuRuntimeCallsPass());

return pm.run(m);		return pm.run(m);
}		}

int main(int argc, char **argv) {		int main(int argc, char **argv) {
registerPassManagerCLOptions();		registerPassManagerCLOptions();
mlir::registerAllDialects();		mlir::registerAllDialects();
llvm::InitLLVM y(argc, argv);		llvm::InitLLVM y(argc, argv);
llvm::InitializeNativeTarget();		llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();		llvm::InitializeNativeTargetAsmPrinter();
mlir::initializeLLVMPasses();		mlir::initializeLLVMPasses();
return mlir::JitRunnerMain(argc, argv, &runMLIRPasses);		return mlir::JitRunnerMain(argc, argv, &runMLIRPasses);
}		}

mlir/tools/mlir-opt/mlir-opt.cpp

Show All 40 Lines
void registerTestAffineDataCopyPass();		void registerTestAffineDataCopyPass();
void registerTestAllReduceLoweringPass();		void registerTestAllReduceLoweringPass();
void registerTestAffineLoopUnswitchingPass();		void registerTestAffineLoopUnswitchingPass();
void registerTestBufferPlacementPreparationPass();		void registerTestBufferPlacementPreparationPass();
void registerTestLoopPermutationPass();		void registerTestLoopPermutationPass();
void registerTestCallGraphPass();		void registerTestCallGraphPass();
void registerTestConstantFold();		void registerTestConstantFold();
void registerTestConvertGPUKernelToCubinPass();		void registerTestConvertGPUKernelToCubinPass();
		void registerTestConvertGPUKernelToHsacoPass();
void registerTestDominancePass();		void registerTestDominancePass();
void registerTestFunc();		void registerTestFunc();
void registerTestGpuMemoryPromotionPass();		void registerTestGpuMemoryPromotionPass();
void registerTestLinalgTransforms();		void registerTestLinalgTransforms();
void registerTestLivenessPass();		void registerTestLivenessPass();
void registerTestLoopFusion();		void registerTestLoopFusion();
void registerTestLoopMappingPass();		void registerTestLoopMappingPass();
void registerTestLoopUnrollingPass();		void registerTestLoopUnrollingPass();
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	void registerTestPasses() {
registerTestAllReduceLoweringPass();		registerTestAllReduceLoweringPass();
registerTestAffineLoopUnswitchingPass();		registerTestAffineLoopUnswitchingPass();
registerTestLoopPermutationPass();		registerTestLoopPermutationPass();
registerTestCallGraphPass();		registerTestCallGraphPass();
registerTestConstantFold();		registerTestConstantFold();
#if MLIR_CUDA_CONVERSIONS_ENABLED		#if MLIR_CUDA_CONVERSIONS_ENABLED
registerTestConvertGPUKernelToCubinPass();		registerTestConvertGPUKernelToCubinPass();
#endif		#endif
		#if MLIR_ROCM_CONVERSIONS_ENABLED
		registerTestConvertGPUKernelToHsacoPass();
		#endif
registerTestBufferPlacementPreparationPass();		registerTestBufferPlacementPreparationPass();
registerTestDominancePass();		registerTestDominancePass();
registerTestFunc();		registerTestFunc();
registerTestGpuMemoryPromotionPass();		registerTestGpuMemoryPromotionPass();
registerTestLinalgTransforms();		registerTestLinalgTransforms();
registerTestLivenessPass();		registerTestLivenessPass();
registerTestLoopFusion();		registerTestLoopFusion();
registerTestLoopMappingPass();		registerTestLoopMappingPass();
▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][gpu][mlir-cuda-runner] Refactor ConvertKernelFuncToCubin to be generic.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 265806

mlir/CMakeLists.txt

mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h

mlir/include/mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h

mlir/include/mlir/InitAllPasses.h

mlir/lib/Conversion/CMakeLists.txt

mlir/lib/Conversion/GPUCommon/CMakeLists.txt

mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp

mlir/lib/Conversion/GPUToCUDA/CMakeLists.txt

mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp

mlir/test/Conversion/GPUToROCm/lit.local.cfg

mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir

mlir/test/lib/Transforms/CMakeLists.txt

mlir/test/lib/Transforms/TestConvertGPUKernelToCubin.cpp

mlir/test/lib/Transforms/TestConvertGPUKernelToHsaco.cpp

mlir/test/lit.site.cfg.py.in

mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp

mlir/tools/mlir-opt/mlir-opt.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][gpu][mlir-cuda-runner] Refactor ConvertKernelFuncToCubin to be generic.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 265806

mlir/CMakeLists.txt

mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h

mlir/include/mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h

mlir/include/mlir/InitAllPasses.h

mlir/lib/Conversion/CMakeLists.txt

mlir/lib/Conversion/GPUCommon/CMakeLists.txt

mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp

mlir/lib/Conversion/GPUToCUDA/CMakeLists.txt

mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp

mlir/test/Conversion/GPUToROCm/lit.local.cfg

mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir

mlir/test/lib/Transforms/CMakeLists.txt

mlir/test/lib/Transforms/TestConvertGPUKernelToCubin.cpp

mlir/test/lib/Transforms/TestConvertGPUKernelToHsaco.cpp

mlir/test/lit.site.cfg.py.in

mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp

mlir/tools/mlir-opt/mlir-opt.cpp

[mlir][gpu][mlir-cuda-runner] Refactor ConvertKernelFuncToCubin to be generic.
ClosedPublic