This is an archive of the discontinued LLVM Phabricator instance.

Add support for __nvvm_reflect changes in libdevice in CUDA-7.0
ClosedPublic

Authored by tra on Mar 17 2015, 3:04 PM.

Download Raw Diff

Details

Reviewers

eliben
echristo

Commits

rG9e8a03931838: Add support for __nvvm_reflect changes in libdevice in CUDA-7.0
rL232732: Add support for __nvvm_reflect changes in libdevice in CUDA-7.0

Summary

CUDA 7.0's libdevice uses slightly different IR to call nvvm_reflect
and that triggers an assertion in nvvm_reflect optimization pass. This
change allows nvvm_reflect pass to deal with both old and new ways to
pass an argument to nvvm_reflect.

Diff Detail

Repository: rL LLVM

Event Timeline

tra updated this revision to Diff 22135.Mar 17 2015, 3:04 PM

tra retitled this revision from to Add support for __nvvm_reflect changes in libdevice in CUDA-7.0.

tra updated this object.

tra edited the test plan for this revision. (Show Details)

tra added reviewers: eliben, echristo.

tra added a subscriber: Unknown Object (MLST).

Herald added a subscriber: jholewinski. · View Herald TranscriptMar 17 2015, 3:04 PM

eliben added inline comments.Mar 17 2015, 6:44 PM

lib/Target/NVPTX/NVVMReflect.cpp
162 ↗	(On Diff #22135)	Artem, it definitely makes sense to document this change. For lack of a better place, a detailed comment somewhere here would do. Please describe the difference between the two formats and how it's handled here.

Added details on the differences in IR used to call __nvvm_reflect in different CUDA versions.
Updated IR in the test case to work with recent LLVM.

eliben added inline comments.Mar 18 2015, 4:52 PM

lib/Target/NVPTX/NVVMReflect.cpp
182 ↗	(On Diff #22207)	It may be more idiomatic to use dyn_cast here and then get your Operand*, removing the following cast<>s It's done a bunch all over llvm
test/CodeGen/NVPTX/nvvm-reflect.ll
52 ↗	(On Diff #22207)	Mention that this is the CUDA 7 version

Addressed eliben@'s comments.

LGTM

This revision is now accepted and ready to land.Mar 18 2015, 5:28 PM

Closed by commit rL232732: Add support for __nvvm_reflect changes in libdevice in CUDA-7.0 (authored by tra). · Explain WhyMar 19 2015, 10:08 AM

This revision was automatically updated to reflect the committed changes.

Comment looks good (heinous, but good).

LGTM.

-eric

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

NVPTX/

NVVMReflect.cpp

37 lines

test/

CodeGen/

NVPTX/

nvvm-reflect.ll

31 lines

Diff 22270

llvm/trunk/lib/Target/NVPTX/NVVMReflect.cpp

Show First 20 Lines • Show All 131 Lines • ▼ Show 20 Lines	bool NVVMReflect::handleFunction(Function *ReflectFunction) {
std::vector<Instruction *> ToRemove;		std::vector<Instruction *> ToRemove;

// Go through the uses of ReflectFunction in this Function.		// Go through the uses of ReflectFunction in this Function.
// Each of them should a CallInst with a ConstantArray argument.		// Each of them should a CallInst with a ConstantArray argument.
// First validate that. If the c-string corresponding to the		// First validate that. If the c-string corresponding to the
// ConstantArray can be found successfully, see if it can be		// ConstantArray can be found successfully, see if it can be
// found in VarMap. If so, replace the uses of CallInst with the		// found in VarMap. If so, replace the uses of CallInst with the
// value found in VarMap. If not, replace the use with value 0.		// value found in VarMap. If not, replace the use with value 0.

		// IR for __nvvm_reflect calls differs between CUDA versions:
		// CUDA 6.5 and earlier uses this sequence:
		// %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8
		// (i8 addrspace(4)* getelementptr inbounds
		// ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
		// %reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
		//
		// Value returned by Sym->getOperand(0) is a Constant with a
		// ConstantDataSequential operand which can be converted to string and used
		// for lookup.
		//
		// CUDA 7.0 does it slightly differently:
		// %reflect = call i32 @__nvvm_reflect(i8* addrspacecast
		// (i8 addrspace(1)* getelementptr inbounds
		// ([8 x i8], [8 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*))
		//
		// In this case, we get a Constant with a GlobalVariable operand and we need
		// to dig deeper to find its initializer with the string we'll use for lookup.

for (User *U : ReflectFunction->users()) {		for (User *U : ReflectFunction->users()) {
assert(isa<CallInst>(U) && "Only a call instruction can use _reflect");		assert(isa<CallInst>(U) && "Only a call instruction can use _reflect");
CallInst *Reflect = cast<CallInst>(U);		CallInst *Reflect = cast<CallInst>(U);

assert((Reflect->getNumOperands() == 2) &&		assert((Reflect->getNumOperands() == 2) &&
"Only one operand expect for _reflect function");		"Only one operand expect for _reflect function");
// In cuda, we will have an extra constant-to-generic conversion of		// In cuda, we will have an extra constant-to-generic conversion of
// the string.		// the string.
const Value *Str = Reflect->getArgOperand(0);		const Value *Str = Reflect->getArgOperand(0);
if (isa<CallInst>(Str)) {		if (isa<CallInst>(Str)) {
// CUDA path		// CUDA path
const CallInst *ConvCall = cast<CallInst>(Str);		const CallInst *ConvCall = cast<CallInst>(Str);
Str = ConvCall->getArgOperand(0);		Str = ConvCall->getArgOperand(0);
}		}
assert(isa<ConstantExpr>(Str) &&		assert(isa<ConstantExpr>(Str) &&
"Format of _reflect function not recognized");		"Format of _reflect function not recognized");
const ConstantExpr *GEP = cast<ConstantExpr>(Str);		const ConstantExpr *GEP = cast<ConstantExpr>(Str);

const Value *Sym = GEP->getOperand(0);		const Value *Sym = GEP->getOperand(0);
assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");		assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");

const Constant *SymStr = cast<Constant>(Sym);		const Value *Operand = cast<Constant>(Sym)->getOperand(0);
		if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) {
assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) &&		// For CUDA-7.0 style __nvvm_reflect calls we need to find operand's
		// initializer.
		assert(GV->hasInitializer() &&
"Format of _reflect function not recognized");		"Format of _reflect function not recognized");
		const Constant *Initializer = GV->getInitializer();
		Operand = Initializer;
		}

assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() &&		assert(isa<ConstantDataSequential>(Operand) &&
		"Format of _reflect function not recognized");
		assert(cast<ConstantDataSequential>(Operand)->isCString() &&
"Format of _reflect function not recognized");		"Format of _reflect function not recognized");

std::string ReflectArg =		std::string ReflectArg =
cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString();		cast<ConstantDataSequential>(Operand)->getAsString();

ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1);		ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1);
DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");		DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");

int ReflectVal = 0; // The default value is 0		int ReflectVal = 0; // The default value is 0
if (VarMap.find(ReflectArg) != VarMap.end()) {		if (VarMap.find(ReflectArg) != VarMap.end()) {
ReflectVal = VarMap[ReflectArg];		ReflectVal = VarMap[ReflectArg];
}		}
▲ Show 20 Lines • Show All 44 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/NVPTX/nvvm-reflect.ll

	Show All 40 Lines
	; USE_MUL_0-NOT: call i32 @llvm.nvvm.reflect			; USE_MUL_0-NOT: call i32 @llvm.nvvm.reflect
	; USE_MUL_0: ret i32 0			; USE_MUL_0: ret i32 0
	; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect			; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect
	; USE_MUL_1: ret i32 1			; USE_MUL_1: ret i32 1
	%ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))			%ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
	%reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)			%reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)
	ret i32 %reflect			ret i32 %reflect
	}			}

				; CUDA-7.0 passes __nvvm_reflect argument slightly differently.
				; Verify that it works, too

				@"$str" = private addrspace(1) constant [8 x i8] c"USE_MUL\00"

				define float @bar(float %a, float %b) {
				; USE_MUL_0: define float @bar
				; USE_MUL_0-NOT: call i32 @__nvvm_reflect
				; USE_MUL_1: define float @bar
				; USE_MUL_1-NOT: call i32 @__nvvm_reflect
				%reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
				%cmp = icmp ne i32 %reflect, 0
				br i1 %cmp, label %use_mul, label %use_add

				use_mul:
				; USE_MUL_1: fmul float %a, %b
				; USE_MUL_0-NOT: fadd float %a, %b
				%ret1 = fmul float %a, %b
				br label %exit

				use_add:
				; USE_MUL_0: fadd float %a, %b
				; USE_MUL_1-NOT: fmul float %a, %b
				%ret2 = fadd float %a, %b
				br label %exit

				exit:
				%ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
				ret float %ret
				}