Index: llvm/trunk/lib/Target/NVPTX/NVVMReflect.cpp =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVVMReflect.cpp +++ llvm/trunk/lib/Target/NVPTX/NVVMReflect.cpp @@ -7,11 +7,16 @@ // //===----------------------------------------------------------------------===// // -// This pass replaces occurrences of __nvvm_reflect("string") and -// llvm.nvvm.reflect with an integer based on the value of -nvvm-reflect-list -// string=. +// This pass replaces occurrences of __nvvm_reflect("foo") and llvm.nvvm.reflect +// with an integer. // -// If we see a string not specified in our flags, we replace that call with 0. +// We choose the value we use by looking, in this order, at: +// +// * the -nvvm-reflect-list flag, which has the format "foo=1,bar=42", +// * the StringMap passed to the pass's constructor, and +// * metadata in the module itself. +// +// If we see an unknown string, we replace its call with 0. // //===----------------------------------------------------------------------===// @@ -55,10 +60,9 @@ static char ID; NVVMReflect() : NVVMReflect(StringMap()) {} - NVVMReflect(const StringMap &Mapping) : FunctionPass(ID) { + NVVMReflect(const StringMap &Mapping) + : FunctionPass(ID), VarMap(Mapping) { initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); - for (const auto &KV : Mapping) - VarMap[KV.getKey()] = KV.getValue(); setVarMap(); } @@ -206,6 +210,12 @@ auto Iter = VarMap.find(ReflectArg); if (Iter != VarMap.end()) ReflectVal = Iter->second; + else if (ReflectArg == "__CUDA_FTZ") { + // Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag. + if (auto *Flag = mdconst::extract_or_null( + F.getParent()->getModuleFlag("nvvm-reflect-ftz"))) + ReflectVal = Flag->getSExtValue(); + } Call->replaceAllUsesWith(ConstantInt::get(Call->getType(), ReflectVal)); ToRemove.push_back(Call); } Index: llvm/trunk/test/CodeGen/NVPTX/nvvm-reflect-module-flag.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/nvvm-reflect-module-flag.ll +++ llvm/trunk/test/CodeGen/NVPTX/nvvm-reflect-module-flag.ll @@ -0,0 +1,13 @@ +; RUN: opt < %s -S -nvvm-reflect | FileCheck %s + +declare i32 @__nvvm_reflect(i8*) +@str = private unnamed_addr addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00" + +define i32 @foo() { + %call = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*)) + ; CHECK: ret i32 42 + ret i32 %call +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nvvm-reflect-ftz", i32 42}