This is an archive of the discontinued LLVM Phabricator instance.

[CUDA] Place GPU binary into .nv_fatbin section and align it by 8.
ClosedPublic

Authored by tra on Aug 11 2016, 3:29 PM.

Download Raw Diff

Details

Reviewers

Commits

rG4c09318be2dd: [CUDA] Place GPU binary into .nv_fatbin section and align it by 8.
rC278549: [CUDA] Place GPU binary into .nv_fatbin section and align it by 8.
rL278549: [CUDA] Place GPU binary into .nv_fatbin section and align it by 8.

Summary

This matches the way nvcc encapsulates GPU binaries into host object file.
Now cuobjdump can deal with clang-compiled object files.

Diff Detail

Repository: rL LLVM

Event Timeline

tra updated this revision to Diff 67754.Aug 11 2016, 3:29 PM

tra retitled this revision from to [CUDA] Place GPU binary into .nv_fatbin section and align it by 8. .

tra updated this object.

tra added a reviewer: jlebar.

tra added a subscriber: cfe-commits.

jlebar added inline comments.Aug 11 2016, 3:56 PM

lib/CodeGen/CGCUDANV.cpp
62 ↗	(On Diff #67754)	StringRefs aren't necessarily null-terminated. :)

Reverted argument type to std::string

tra marked an inline comment as done.Aug 12 2016, 10:16 AM

tra added inline comments.

lib/CodeGen/CGCUDANV.cpp
62–69 ↗	(On Diff #67851)	Good point. I've reverted argument types to std::string.

jlebar accepted this revision.Aug 12 2016, 10:22 AM

jlebar edited edge metadata.

This revision is now accepted and ready to land.Aug 12 2016, 10:22 AM

Closed by commit rL278549: [CUDA] Place GPU binary into .nv_fatbin section and align it by 8. (authored by tra). · Explain WhyAug 12 2016, 11:51 AM

This revision was automatically updated to reflect the committed changes.

tra marked an inline comment as done.

Revision Contents

Path

Size

cfe/

trunk/

lib/

CodeGen/

CGCUDANV.cpp

11 lines

test/

CodeGenCUDA/

device-stub.cu

6 lines

Diff 67878

cfe/trunk/lib/CodeGen/CGCUDANV.cpp

Show First 20 Lines • Show All 49 Lines • ▼ Show 20 Lines	private:
/// Creates a function to register all kernel stubs generated in this module.		/// Creates a function to register all kernel stubs generated in this module.
llvm::Function *makeRegisterGlobalsFn();		llvm::Function *makeRegisterGlobalsFn();

/// Helper function that generates a constant string and returns a pointer to		/// Helper function that generates a constant string and returns a pointer to
/// the start of the string. The result of this function can be used anywhere		/// the start of the string. The result of this function can be used anywhere
/// where the C code specifies const char*.		/// where the C code specifies const char*.
llvm::Constant *makeConstantString(const std::string &Str,		llvm::Constant *makeConstantString(const std::string &Str,
const std::string &Name = "",		const std::string &Name = "",
		const std::string &SectionName = "",
unsigned Alignment = 0) {		unsigned Alignment = 0) {
llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),		llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
llvm::ConstantInt::get(SizeTy, 0)};		llvm::ConstantInt::get(SizeTy, 0)};
auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());		auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
		llvm::GlobalVariable *GV =
		cast<llvm::GlobalVariable>(ConstStr.getPointer());
		if (!SectionName.empty())
		GV->setSection(SectionName);
		if (Alignment)
		GV->setAlignment(Alignment);

return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),		return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
ConstStr.getPointer(), Zeros);		ConstStr.getPointer(), Zeros);
}		}

void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);		void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);

public:		public:
CGNVCUDARuntime(CodeGenModule &CGM);		CGNVCUDARuntime(CodeGenModule &CGM);
▲ Show 20 Lines • Show All 210 Lines • ▼ Show 20 Lines	if (std::error_code EC = GpuBinaryOrErr.getError()) {
<< EC.message();		<< EC.message();
continue;		continue;
}		}

// Create initialized wrapper structure that points to the loaded GPU binary		// Create initialized wrapper structure that points to the loaded GPU binary
llvm::Constant *Values[] = {		llvm::Constant *Values[] = {
llvm::ConstantInt::get(IntTy, 0x466243b1), // Fatbin wrapper magic.		llvm::ConstantInt::get(IntTy, 0x466243b1), // Fatbin wrapper magic.
llvm::ConstantInt::get(IntTy, 1), // Fatbin version.		llvm::ConstantInt::get(IntTy, 1), // Fatbin version.
makeConstantString(GpuBinaryOrErr.get()->getBuffer(), "", 16), // Data.		makeConstantString(GpuBinaryOrErr.get()->getBuffer(), // Data.
		"", ".nv_fatbin", 8), //
llvm::ConstantPointerNull::get(VoidPtrTy)}; // Unused in fatbin v1.		llvm::ConstantPointerNull::get(VoidPtrTy)}; // Unused in fatbin v1.
llvm::GlobalVariable *FatbinWrapper = new llvm::GlobalVariable(		llvm::GlobalVariable *FatbinWrapper = new llvm::GlobalVariable(
TheModule, FatbinWrapperTy, true, llvm::GlobalValue::InternalLinkage,		TheModule, FatbinWrapperTy, true, llvm::GlobalValue::InternalLinkage,
llvm::ConstantStruct::get(FatbinWrapperTy, Values),		llvm::ConstantStruct::get(FatbinWrapperTy, Values),
"__cuda_fatbin_wrapper");		"__cuda_fatbin_wrapper");
// NVIDIA's cuobjdump looks for fatbins in this section.		// NVIDIA's cuobjdump looks for fatbins in this section.
FatbinWrapper->setSection(".nvFatBinSegment");		FatbinWrapper->setSection(".nvFatBinSegment");

▲ Show 20 Lines • Show All 62 Lines • Show Last 20 Lines

cfe/trunk/test/CodeGenCUDA/device-stub.cu

Show All 39 Lines	void use_pointers() {
p = &ext_constant_var;		p = &ext_constant_var;
p = &ext_host_var;		p = &ext_host_var;
}		}

// Make sure that all parts of GPU code init/cleanup are there:		// Make sure that all parts of GPU code init/cleanup are there:
// * constant unnamed string with the kernel name		// * constant unnamed string with the kernel name
// CHECK: private unnamed_addr constant{{.}}kernelfunc{{.}}\00"		// CHECK: private unnamed_addr constant{{.}}kernelfunc{{.}}\00"
// * constant unnamed string with GPU binary		// * constant unnamed string with GPU binary
// CHECK: private unnamed_addr constant{{.*}}\00"		// CHECK: private unnamed_addr constant{{.GPU binary would be here.}}\00"
		// CHECK-SAME: section ".nv_fatbin", align 8
// * constant struct that wraps GPU binary		// * constant struct that wraps GPU binary
// CHECK: @__cuda_fatbin_wrapper = internal constant { i32, i32, i8, i8 }		// CHECK: @__cuda_fatbin_wrapper = internal constant { i32, i32, i8, i8 }
// CHECK: { i32 1180844977, i32 1, {{.}}, i8 null }		// CHECK-SAME: { i32 1180844977, i32 1, {{.}}, i8 null }
		// CHECK-SAME: section ".nvFatBinSegment"
// * variable to save GPU binary handle after initialization		// * variable to save GPU binary handle after initialization
// CHECK: @__cuda_gpubin_handle = internal global i8** null		// CHECK: @__cuda_gpubin_handle = internal global i8** null
// * Make sure our constructor/destructor was added to global ctor/dtor list.		// * Make sure our constructor/destructor was added to global ctor/dtor list.
// CHECK: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor		// CHECK: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor
// CHECK: @llvm.global_dtors = appending global {{.*}}@__cuda_module_dtor		// CHECK: @llvm.global_dtors = appending global {{.*}}@__cuda_module_dtor

// Test that we build the correct number of calls to cudaSetupArgument followed		// Test that we build the correct number of calls to cudaSetupArgument followed
// by a call to cudaLaunch.		// by a call to cudaLaunch.
▲ Show 20 Lines • Show All 53 Lines • Show Last 20 Lines