This is an archive of the discontinued LLVM Phabricator instance.

Add support for __nvvm_reflect changes in libdevice in CUDA-7.0
ClosedPublic

Authored by tra on Mar 17 2015, 3:04 PM.

Download Raw Diff

Details

Reviewers

eliben
echristo

Commits

rG9e8a03931838: Add support for __nvvm_reflect changes in libdevice in CUDA-7.0
rL232732: Add support for __nvvm_reflect changes in libdevice in CUDA-7.0

Summary

CUDA 7.0's libdevice uses slightly different IR to call nvvm_reflect
and that triggers an assertion in nvvm_reflect optimization pass. This
change allows nvvm_reflect pass to deal with both old and new ways to
pass an argument to nvvm_reflect.

Diff Detail

Event Timeline

tra updated this revision to Diff 22135.Mar 17 2015, 3:04 PM

tra retitled this revision from to Add support for __nvvm_reflect changes in libdevice in CUDA-7.0.

tra updated this object.

tra edited the test plan for this revision. (Show Details)

tra added reviewers: eliben, echristo.

tra added a subscriber: Unknown Object (MLST).

Herald added a subscriber: jholewinski. · View Herald TranscriptMar 17 2015, 3:04 PM

eliben added inline comments.Mar 17 2015, 6:44 PM

lib/Target/NVPTX/NVVMReflect.cpp
162	Artem, it definitely makes sense to document this change. For lack of a better place, a detailed comment somewhere here would do. Please describe the difference between the two formats and how it's handled here.

Added details on the differences in IR used to call __nvvm_reflect in different CUDA versions.
Updated IR in the test case to work with recent LLVM.

eliben added inline comments.Mar 18 2015, 4:52 PM

lib/Target/NVPTX/NVVMReflect.cpp
162	It may be more idiomatic to use dyn_cast here and then get your Operand*, removing the following cast<>s It's done a bunch all over llvm
test/CodeGen/NVPTX/nvvm-reflect.ll
52	Mention that this is the CUDA 7 version

Addressed eliben@'s comments.

LGTM

This revision is now accepted and ready to land.Mar 18 2015, 5:28 PM

Closed by commit rL232732: Add support for __nvvm_reflect changes in libdevice in CUDA-7.0 (authored by tra). · Explain WhyMar 19 2015, 10:08 AM

This revision was automatically updated to reflect the committed changes.

Comment looks good (heinous, but good).

LGTM.

-eric

Revision Contents

Path

Size

lib/

Target/

NVPTX/

NVVMReflect.cpp

16 lines

test/

CodeGen/

NVPTX/

nvvm-reflect.ll

28 lines

Diff 22135

lib/Target/NVPTX/NVVMReflect.cpp

Show First 20 Lines • Show All 152 Lines • ▼ Show 20 Lines	for (User *U : ReflectFunction->users()) {
}		}
assert(isa<ConstantExpr>(Str) &&		assert(isa<ConstantExpr>(Str) &&
"Format of _reflect function not recognized");		"Format of _reflect function not recognized");
const ConstantExpr *GEP = cast<ConstantExpr>(Str);		const ConstantExpr *GEP = cast<ConstantExpr>(Str);

const Value *Sym = GEP->getOperand(0);		const Value *Sym = GEP->getOperand(0);
assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");		assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");

const Constant *SymStr = cast<Constant>(Sym);		const Value *Operand = cast<Constant>(Sym)->getOperand(0);
		if (isa<GlobalVariable>(Operand)) {
		elibenUnsubmitted Not Done Reply Inline Actions Artem, it definitely makes sense to document this change. For lack of a better place, a detailed comment somewhere here would do. Please describe the difference between the two formats and how it's handled here. eliben: Artem, it definitely makes sense to document this change. For lack of a better place, a…
		elibenUnsubmitted Not Done Reply Inline Actions It may be more idiomatic to use dyn_cast here and then get your Operand, removing the following cast<>s It's done a bunch all over llvm eliben:* It may be more idiomatic to use dyn_cast here and then get your Operand*, removing the…
assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) &&		assert(cast<GlobalVariable>(Operand)->hasInitializer() &&
"Format of _reflect function not recognized");		"Format of _reflect function not recognized");
		const Constant *Initializer =
		cast<GlobalVariable>(Operand)->getInitializer();
		Operand = Initializer;
		}

assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() &&		assert(isa<ConstantDataSequential>(Operand) &&
		"Format of _reflect function not recognized");
		assert(cast<ConstantDataSequential>(Operand)->isCString() &&
"Format of _reflect function not recognized");		"Format of _reflect function not recognized");

std::string ReflectArg =		std::string ReflectArg =
cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString();		cast<ConstantDataSequential>(Operand)->getAsString();

ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1);		ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1);
DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");		DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");

int ReflectVal = 0; // The default value is 0		int ReflectVal = 0; // The default value is 0
if (VarMap.find(ReflectArg) != VarMap.end()) {		if (VarMap.find(ReflectArg) != VarMap.end()) {
ReflectVal = VarMap[ReflectArg];		ReflectVal = VarMap[ReflectArg];
}		}
▲ Show 20 Lines • Show All 44 Lines • Show Last 20 Lines

test/CodeGen/NVPTX/nvvm-reflect.ll

	Show All 40 Lines
	; USE_MUL_0-NOT: call i32 @llvm.nvvm.reflect			; USE_MUL_0-NOT: call i32 @llvm.nvvm.reflect
	; USE_MUL_0: ret i32 0			; USE_MUL_0: ret i32 0
	; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect			; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect
	; USE_MUL_1: ret i32 1			; USE_MUL_1: ret i32 1
	%ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0))			%ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0))
	%reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)			%reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)
	ret i32 %reflect			ret i32 %reflect
	}			}

				@"$str" = private addrspace(1) constant [8 x i8] c"USE_MUL\00"

				define float @bar(float %a, float %b) {
				elibenUnsubmitted Not Done Reply Inline Actions Mention that this is the CUDA 7 version eliben: Mention that this is the CUDA 7 version
				; USE_MUL_0: define float @bar
				; USE_MUL_0-NOT: call i32 @__nvvm_reflect
				; USE_MUL_1: define float @bar
				; USE_MUL_1-NOT: call i32 @__nvvm_reflect
				%reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
				%cmp = icmp ne i32 %reflect, 0
				br i1 %cmp, label %use_mul, label %use_add

				use_mul:
				; USE_MUL_1: fmul float %a, %b
				; USE_MUL_0-NOT: fadd float %a, %b
				%ret1 = fmul float %a, %b
				br label %exit

				use_add:
				; USE_MUL_0: fadd float %a, %b
				; USE_MUL_1-NOT: fmul float %a, %b
				%ret2 = fadd float %a, %b
				br label %exit

				exit:
				%ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
				ret float %ret
				}