This is an archive of the discontinued LLVM Phabricator instance.

Show First 20 Lines • Show All 111 Lines • ▼ Show 20 Lines	Option<std::string> features{*this, "features",
::llvm::cl::desc("Target features")};		::llvm::cl::desc("Target features")};
Option<int> optLevel{*this, "opt-level",		Option<int> optLevel{*this, "opt-level",
llvm::cl::desc("Optimization level for compilation"),		llvm::cl::desc("Optimization level for compilation"),
llvm::cl::init(2)};		llvm::cl::init(2)};
Option<std::string> gpuBinaryAnnotation{		Option<std::string> gpuBinaryAnnotation{
*this, "gpu-binary-annotation",		*this, "gpu-binary-annotation",
llvm::cl::desc("Annotation attribute string for GPU binary"),		llvm::cl::desc("Annotation attribute string for GPU binary"),
llvm::cl::init(getDefaultGpuBinaryAnnotation())};		llvm::cl::init(getDefaultGpuBinaryAnnotation())};
		Option<bool> dumpPtx{*this, "dump-ptx",
		::llvm::cl::desc("Dump generated PTX"),
		llvm::cl::init(false)};
};		};
} // namespace gpu		} // namespace gpu

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Registration		// Registration
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

/// Register pass to serialize GPU kernel functions to a CUBIN binary		/// Register pass to serialize GPU kernel functions to a CUBIN binary
/// annotation.		/// annotation.
void registerGpuSerializeToCubinPass();		void registerGpuSerializeToCubinPass();

/// Register pass to serialize GPU kernel functions to a HSAco binary		/// Register pass to serialize GPU kernel functions to a HSAco binary
/// annotation.		/// annotation.
void registerGpuSerializeToHsacoPass();		void registerGpuSerializeToHsacoPass();

/// Create an instance of the GPU kernel function to CUBIN binary serialization		/// Create an instance of the GPU kernel function to CUBIN binary serialization
/// pass with optLevel (default level 2).		/// pass with optLevel (default level 2).
std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,		std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
StringRef chip,		StringRef chip,
StringRef features,		StringRef features,
int optLevel = 2);		int optLevel = 2,
		bool dumpPtx = false);

/// Create an instance of the GPU kernel function to HSAco binary serialization		/// Create an instance of the GPU kernel function to HSAco binary serialization
/// pass.		/// pass.
std::unique_ptr<Pass> createGpuSerializeToHsacoPass(StringRef triple,		std::unique_ptr<Pass> createGpuSerializeToHsacoPass(StringRef triple,
StringRef arch,		StringRef arch,
StringRef features,		StringRef features,
int optLevel);		int optLevel);

/// Generate the code for registering passes.		/// Generate the code for registering passes.
#define GEN_PASS_REGISTRATION		#define GEN_PASS_REGISTRATION
#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"		#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"

} // namespace mlir		} // namespace mlir

#endif // MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_		#endif // MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_

mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp

//===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===//		//===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// This file implements a pass that serializes a gpu module into CUBIN blob and		// This file implements a pass that serializes a gpu module into CUBIN blob and
// adds that blob as a string attribute of the module.		// adds that blob as a string attribute of the module.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "mlir/Dialect/GPU/Transforms/Passes.h"		#include "mlir/Dialect/GPU/Transforms/Passes.h"
		#include "llvm/Support/Debug.h"

#if MLIR_GPU_TO_CUBIN_PASS_ENABLE		#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
#include "mlir/Pass/Pass.h"		#include "mlir/Pass/Pass.h"
#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"		#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"		#include "mlir/Target/LLVMIR/Export.h"
#include "llvm/Support/TargetSelect.h"		#include "llvm/Support/TargetSelect.h"

#include <cuda.h>		#include <cuda.h>
Show All 22 Lines
namespace {		namespace {
class SerializeToCubinPass		class SerializeToCubinPass
: public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> {		: public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> {
public:		public:
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass)		MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass)

SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda",		SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda",
StringRef chip = "sm_35", StringRef features = "+ptx60",		StringRef chip = "sm_35", StringRef features = "+ptx60",
int optLevel = 2);		int optLevel = 2, bool dumpPtx = false);

StringRef getArgument() const override { return "gpu-to-cubin"; }		StringRef getArgument() const override { return "gpu-to-cubin"; }
StringRef getDescription() const override {		StringRef getDescription() const override {
return "Lower GPU kernel function to CUBIN binary annotations";		return "Lower GPU kernel function to CUBIN binary annotations";
}		}

private:		private:
void getDependentDialects(DialectRegistry &registry) const override;		void getDependentDialects(DialectRegistry &registry) const override;

// Serializes PTX to CUBIN.		// Serializes PTX to CUBIN.
std::unique_ptr<std::vector<char>>		std::unique_ptr<std::vector<char>>
serializeISA(const std::string &isa) override;		serializeISA(const std::string &isa) override;
};		};
} // namespace		} // namespace

// Sets the 'option' to 'value' unless it already has a value.		// Sets the 'option' to 'value' unless it already has a value.
static void maybeSetOption(Pass::Option<std::string> &option, StringRef value) {		static void maybeSetOption(Pass::Option<std::string> &option, StringRef value) {
if (!option.hasValue())		if (!option.hasValue())
option = value.str();		option = value.str();
}		}

SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip,		SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip,
StringRef features, int optLevel) {		StringRef features, int optLevel,
		bool dumpPtx) {
maybeSetOption(this->triple, triple);		maybeSetOption(this->triple, triple);
maybeSetOption(this->chip, chip);		maybeSetOption(this->chip, chip);
maybeSetOption(this->features, features);		maybeSetOption(this->features, features);
		this->dumpPtx = dumpPtx;
if (this->optLevel.getNumOccurrences() == 0)		if (this->optLevel.getNumOccurrences() == 0)
this->optLevel.setValue(optLevel);		this->optLevel.setValue(optLevel);
}		}

void SerializeToCubinPass::getDependentDialects(		void SerializeToCubinPass::getDependentDialects(
DialectRegistry &registry) const {		DialectRegistry &registry) const {
registerNVVMDialectTranslation(registry);		registerNVVMDialectTranslation(registry);
gpu::SerializeToBlobPass::getDependentDialects(registry);		gpu::SerializeToBlobPass::getDependentDialects(registry);
Show All 19 Lines	void *jitOptionsVals[] = {jitErrorBuffer,
reinterpret_cast<void *>(sizeof(jitErrorBuffer))};		reinterpret_cast<void *>(sizeof(jitErrorBuffer))};

RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */		RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */
jitOptions, /* jit options */		jitOptions, /* jit options */
jitOptionsVals, /* jit option values */		jitOptionsVals, /* jit option values */
&linkState));		&linkState));

auto kernelName = getOperation().getName().str();		auto kernelName = getOperation().getName().str();
		if (dumpPtx) {
		llvm::dbgs() << " Kernel Name : [" << kernelName << "]\n";
		llvm::dbgs() << isa << "\n";
		}
RETURN_ON_CUDA_ERROR(cuLinkAddData(		RETURN_ON_CUDA_ERROR(cuLinkAddData(
linkState, CUjitInputType::CU_JIT_INPUT_PTX,		linkState, CUjitInputType::CU_JIT_INPUT_PTX,
const_cast<void >(static_cast<const void >(isa.c_str())), isa.length(),		const_cast<void >(static_cast<const void >(isa.c_str())), isa.length(),
kernelName.c_str(), 0, /* number of jit options */		kernelName.c_str(), 0, /* number of jit options */
nullptr, /* jit options */		nullptr, /* jit options */
nullptr /* jit option values */		nullptr /* jit option values */
));		));

Show All 23 Lines	PassRegistration<SerializeToCubinPass> registerSerializeToCubin([] {

return std::make_unique<SerializeToCubinPass>();		return std::make_unique<SerializeToCubinPass>();
});		});
}		}

std::unique_ptr<Pass> mlir::createGpuSerializeToCubinPass(StringRef triple,		std::unique_ptr<Pass> mlir::createGpuSerializeToCubinPass(StringRef triple,
StringRef arch,		StringRef arch,
StringRef features,		StringRef features,
int optLevel) {		int optLevel,
		bool dumpPtx) {
return std::make_unique<SerializeToCubinPass>(triple, arch, features,		return std::make_unique<SerializeToCubinPass>(triple, arch, features,
optLevel);		optLevel, dumpPtx);
}		}

#else // MLIR_GPU_TO_CUBIN_PASS_ENABLE		#else // MLIR_GPU_TO_CUBIN_PASS_ENABLE
void mlir::registerGpuSerializeToCubinPass() {}		void mlir::registerGpuSerializeToCubinPass() {}
#endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE		#endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE

mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir

This file was added.

				// RUN: mlir-opt %s \
				// RUN: \| mlir-opt -gpu-kernel-outlining \
				// RUN: \| mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{dump-ptx}))' \
				// RUN: 2>&1 \| FileCheck %s

				// CHECK: Generated by LLVM NVPTX Back-End
				// CHECK: .visible .func kernel_a()
				// CHECK: ret;

				gpu.module @bar {
				llvm.func @kernel_a()
				attributes { gpu.kernel } {
				llvm.return
				}
				}

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][gpu] Add dump-ptx optionClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 540154

mlir/include/mlir/Dialect/GPU/Transforms/Passes.h

mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp

mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir

[mlir][gpu] Add dump-ptx option
ClosedPublic