Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -120,12 +120,8 @@ void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args); std::string getDeviceSideName(const NamedDecl *ND) override; -public: - CGNVCUDARuntime(CodeGenModule &CGM); - - void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override; void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, - bool Extern, bool Constant) override { + bool Extern, bool Constant) { DeviceVars.push_back({&Var, VD, {DeviceVarFlags::Variable, Extern, Constant, @@ -133,7 +129,7 @@ /*Normalized*/ false, 0}}); } void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var, - bool Extern, int Type) override { + bool Extern, int Type) { DeviceVars.push_back({&Var, VD, {DeviceVarFlags::Surface, Extern, /*Constant*/ false, @@ -141,17 +137,27 @@ /*Normalized*/ false, Type}}); } void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var, - bool Extern, int Type, bool Normalized) override { + bool Extern, int Type, bool Normalized) { DeviceVars.push_back({&Var, VD, {DeviceVarFlags::Texture, Extern, /*Constant*/ false, /*Managed*/ false, Normalized, Type}}); } +public: + CGNVCUDARuntime(CodeGenModule &CGM); + + void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override; + void handleVarRegistration(const VarDecl *VD, + llvm::GlobalVariable &Var) override; + /// Creates module constructor function llvm::Function *makeModuleCtorFunction() override; /// Creates module destructor function llvm::Function *makeModuleDtorFunction() override; + void + internalizeDeviceSideVars(const VarDecl *D, + llvm::GlobalValue::LinkageTypes &Linkage) override; }; } @@ -915,3 +921,65 @@ CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) { return new CGNVCUDARuntime(CGM); } + +void CGNVCUDARuntime::internalizeDeviceSideVars( + const VarDecl *D, llvm::GlobalValue::LinkageTypes &Linkage) { + // Host-side shadows of external declarations of device-side + // global variables become internal definitions. These have to + // be internal in order to prevent name conflicts with global + // host variables with the same name in a different TUs. + // + // __shared__ variables are odd. Shadows do get created, but + // they are not registered with the CUDA runtime, so they + // can't really be used to access their device-side + // counterparts. It's not clear yet whether it's nvcc's bug or + // a feature, but we've got to do the same for compatibility. + if (D->hasAttr() || D->hasAttr() || + D->hasAttr() || + D->getType()->isCUDADeviceBuiltinSurfaceType() || + D->getType()->isCUDADeviceBuiltinTextureType()) { + Linkage = llvm::GlobalValue::InternalLinkage; + } +} + +void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D, + llvm::GlobalVariable &GV) { + if (D->hasAttr() || D->hasAttr()) { + // Shadow variables and their properties must be registered with CUDA + // runtime. Skip Extern global variables, which will be registered in + // the TU where they are defined. + // + // Don't register a C++17 inline variable. The local symbol can be + // discarded and referencing a discarded local symbol from outside the + // comdat (__cuda_register_globals) is disallowed by the ELF spec. + // TODO: Reject __device__ constexpr and __device__ inline in Sema. + if (!D->hasExternalStorage() && !D->isInline()) + registerDeviceVar(D, GV, !D->hasDefinition(), + D->hasAttr()); + } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() || + D->getType()->isCUDADeviceBuiltinTextureType()) { + // Builtin surfaces and textures and their template arguments are + // also registered with CUDA runtime. + const ClassTemplateSpecializationDecl *TD = + cast( + D->getType()->getAs()->getDecl()); + const TemplateArgumentList &Args = TD->getTemplateArgs(); + if (TD->hasAttr()) { + assert(Args.size() == 2 && + "Unexpected number of template arguments of CUDA device " + "builtin surface type."); + auto SurfType = Args[1].getAsIntegral(); + if (!D->hasExternalStorage()) + registerDeviceSurf(D, GV, !D->hasDefinition(), SurfType.getSExtValue()); + } else { + assert(Args.size() == 3 && + "Unexpected number of template arguments of CUDA device " + "builtin texture type."); + auto TexType = Args[1].getAsIntegral(); + auto Normalized = Args[2].getAsIntegral(); + if (!D->hasExternalStorage()) + registerDeviceTex(D, GV, !D->hasDefinition(), TexType.getSExtValue(), + Normalized.getZExtValue()); + } + } +} Index: clang/lib/CodeGen/CGCUDARuntime.h =================================================================== --- clang/lib/CodeGen/CGCUDARuntime.h +++ clang/lib/CodeGen/CGCUDARuntime.h @@ -16,6 +16,7 @@ #define LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H #include "llvm/ADT/StringRef.h" +#include "llvm/IR/GlobalValue.h" namespace llvm { class Function; @@ -80,12 +81,10 @@ /// Emits a kernel launch stub. virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0; - virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, - bool Extern, bool Constant) = 0; - virtual void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var, - bool Extern, int Type) = 0; - virtual void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var, - bool Extern, int Type, bool Normalized) = 0; + + /// Check whether a variable is a device variable and register it if true. + virtual void handleVarRegistration(const VarDecl *VD, + llvm::GlobalVariable &Var) = 0; /// Constructs and returns a module initialization function or nullptr if it's /// not needed. Must be called after all kernels have been emitted. @@ -98,6 +97,11 @@ /// Returns function or variable name on device side even if the current /// compilation is for host. virtual std::string getDeviceSideName(const NamedDecl *ND) = 0; + + /// Adjust linkage of shadow variables in host compilation. + virtual void + internalizeDeviceSideVars(const VarDecl *D, + llvm::GlobalValue::LinkageTypes &Linkage) = 0; }; /// Creates an instance of a CUDA runtime class. Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -4267,59 +4267,8 @@ (D->hasAttr() || D->hasAttr())) GV->setExternallyInitialized(true); } else { - // Host-side shadows of external declarations of device-side - // global variables become internal definitions. These have to - // be internal in order to prevent name conflicts with global - // host variables with the same name in a different TUs. - if (D->hasAttr() || D->hasAttr()) { - Linkage = llvm::GlobalValue::InternalLinkage; - // Shadow variables and their properties must be registered with CUDA - // runtime. Skip Extern global variables, which will be registered in - // the TU where they are defined. - // - // Don't register a C++17 inline variable. The local symbol can be - // discarded and referencing a discarded local symbol from outside the - // comdat (__cuda_register_globals) is disallowed by the ELF spec. - // TODO: Reject __device__ constexpr and __device__ inline in Sema. - if (!D->hasExternalStorage() && !D->isInline()) - getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(), - D->hasAttr()); - } else if (D->hasAttr()) { - // __shared__ variables are odd. Shadows do get created, but - // they are not registered with the CUDA runtime, so they - // can't really be used to access their device-side - // counterparts. It's not clear yet whether it's nvcc's bug or - // a feature, but we've got to do the same for compatibility. - Linkage = llvm::GlobalValue::InternalLinkage; - } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() || - D->getType()->isCUDADeviceBuiltinTextureType()) { - // Builtin surfaces and textures and their template arguments are - // also registered with CUDA runtime. - Linkage = llvm::GlobalValue::InternalLinkage; - const ClassTemplateSpecializationDecl *TD = - cast( - D->getType()->getAs()->getDecl()); - const TemplateArgumentList &Args = TD->getTemplateArgs(); - if (TD->hasAttr()) { - assert(Args.size() == 2 && - "Unexpected number of template arguments of CUDA device " - "builtin surface type."); - auto SurfType = Args[1].getAsIntegral(); - if (!D->hasExternalStorage()) - getCUDARuntime().registerDeviceSurf(D, *GV, !D->hasDefinition(), - SurfType.getSExtValue()); - } else { - assert(Args.size() == 3 && - "Unexpected number of template arguments of CUDA device " - "builtin texture type."); - auto TexType = Args[1].getAsIntegral(); - auto Normalized = Args[2].getAsIntegral(); - if (!D->hasExternalStorage()) - getCUDARuntime().registerDeviceTex(D, *GV, !D->hasDefinition(), - TexType.getSExtValue(), - Normalized.getZExtValue()); - } - } + getCUDARuntime().internalizeDeviceSideVars(D, Linkage); + getCUDARuntime().handleVarRegistration(D, *GV); } }