Index: lib/AST/ASTContext.cpp =================================================================== --- lib/AST/ASTContext.cpp +++ lib/AST/ASTContext.cpp @@ -8328,6 +8328,9 @@ if (D->hasAttr() || D->hasAttr()) return true; + if (LangOpts.CUDA && LangOpts.CUDAIsDevice && D->hasAttr()) + return true; + if (const FunctionDecl *FD = dyn_cast(D)) { // Forward declarations aren't required. if (!FD->doesThisDeclarationHaveABody()) Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -813,6 +813,13 @@ if (D->hasAttr()) addUsedGlobal(GV); + + // Treat CUDA kernels as if they have attribute((used)) applied so we don't + // eliminate them (which would have happened otherwise because the code that + // call them is on the host side of the compilation and nothing else + // references the kernels). + if (LangOpts.CUDA && LangOpts.CUDAIsDevice && D->hasAttr()) + addUsedGlobal(GV); } void CodeGenModule::setAliasAttributes(const Decl *D, Index: test/CodeGenCUDA/ptx-kernels.cu =================================================================== --- test/CodeGenCUDA/ptx-kernels.cu +++ test/CodeGenCUDA/ptx-kernels.cu @@ -1,7 +1,16 @@ +// Make sure that __global__ functions are emitted along with correct +// annotations and are added to @llvm.used to prevent their elimination. +// REQUIRES: nvptx-registered-target +// // RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s #include "Inputs/cuda.h" +// Make sure that all __global__ functiona are added to @llvm.used +// CHECK: @llvm.used = appending global +// CHECK-SAME: @global_function +// CHECK-SAME: @_Z16templated_kernelIiEvT_ + // CHECK-LABEL: define void @device_function extern "C" __device__ void device_function() {} @@ -13,4 +22,10 @@ device_function(); } +// Make sure host-instantiated kernels are preserved on device side. +template __global__ void templated_kernel(T param) {} +// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_ +void host_function() { templated_kernel<<<0,0>>>(0); } + // CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1} +// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}