Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -2188,15 +2188,7 @@ } else { const auto *VD = cast(Global); assert(VD->isFileVarDecl() && "Cannot emit local var decl as global."); - // We need to emit device-side global CUDA variables even if a - // variable does not have a definition -- we still need to define - // host-side shadow for it. - bool MustEmitForCuda = LangOpts.CUDA && !LangOpts.CUDAIsDevice && - !VD->hasDefinition() && - (VD->hasAttr() || - VD->hasAttr()); - if (!MustEmitForCuda && - VD->isThisDeclarationADefinition() != VarDecl::Definition && + if (VD->isThisDeclarationADefinition() != VarDecl::Definition && !Context.isMSStaticDataMemberInlineDefinition(VD)) { if (LangOpts.OpenMP) { // Emit declaration of the must-be-emitted declare target variable. @@ -3616,7 +3608,10 @@ Flags |= CGCUDARuntime::ExternDeviceVar; if (D->hasAttr()) Flags |= CGCUDARuntime::ConstantDeviceVar; - getCUDARuntime().registerDeviceVar(*GV, Flags); + // Extern global variables will be registered in the TU where they are + // defined. + if (!D->hasExternalStorage()) + getCUDARuntime().registerDeviceVar(*GV, Flags); } else if (D->hasAttr()) // __shared__ variables are odd. Shadows do get created, but // they are not registered with the CUDA runtime, so they Index: clang/test/CodeGenCUDA/device-stub.cu =================================================================== --- clang/test/CodeGenCUDA/device-stub.cu +++ clang/test/CodeGenCUDA/device-stub.cu @@ -42,13 +42,20 @@ // ALL-DAG: @ext_host_var = external global i32 extern int ext_host_var; -// Shadows for external device-side variables are *definitions* of -// those variables. -// ALL-DAG: @ext_device_var = internal global i32 +// external device-side variables -> extern references to their shadows. +// ALL-DAG: @ext_device_var = external global i32 extern __device__ int ext_device_var; -// ALL-DAG: @ext_device_var = internal global i32 +// ALL-DAG: @ext_device_var = external global i32 extern __constant__ int ext_constant_var; +// external device-side variables with definitions should generate +// definitions for the shadows. +// ALL-DAG: @ext_device_var_def = internal global i32 undef, +extern __device__ int ext_device_var_def; +__device__ int ext_device_var_def = 1; +// ALL-DAG: @ext_device_var_def = internal global i32 undef, +__constant__ int ext_constant_var_def = 2; + void use_pointers() { int *p; p = &device_var; @@ -114,8 +121,8 @@ // ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc // ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0 // ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0 -// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0 -// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0 +// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{.*}}i32 0, i32 4, i32 0, i32 0 +// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{.*}}i32 0, i32 4, i32 1, i32 0 // ALL: ret void // Test that we've built a constructor.