Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -951,6 +951,13 @@ let Documentation = [Undocumented]; } +def CUDADeviceShadow : InheritableAttr { + let Spellings = [GNU<"device_shadow">, Declspec<"__device_shadow__">]; + let Subjects = SubjectList<[Var]>; + let LangOpts = [CUDA]; + let Documentation = [DeviceShadowDocs]; +} + def CUDADeviceBuiltin : IgnoredAttr { let Spellings = [GNU<"device_builtin">, Declspec<"__device_builtin__">]; let LangOpts = [CUDA]; Index: include/clang/Basic/AttrDocs.td =================================================================== --- include/clang/Basic/AttrDocs.td +++ include/clang/Basic/AttrDocs.td @@ -4157,3 +4157,17 @@ ``__attribute__((malloc))``. }]; } + +def DeviceShadowDocs : Documentation { + let Category = DocCatType; + let Content = [{ +The GNU style attribute __attribute__((device_shadow)) or MSVC style attribute +__declspec(device_shadow) can be added to the definition of a global variable +to indicate it is a HIP device shadow variable. A device shadow variable can +be accessed on both device side and host side. It has external linkage and is +not initialized on device side. It has internal linkage and is initialized by +the initializer on host side. + +It is ignored for CUDA and other languages. + }]; +} \ No newline at end of file Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -2414,7 +2414,8 @@ if (!Global->hasAttr() && !Global->hasAttr() && !Global->hasAttr() && - !Global->hasAttr()) + !Global->hasAttr() && + !(LangOpts.HIP && Global->hasAttr())) return; } else { // We need to emit host-side 'shadows' for all global @@ -3769,7 +3770,12 @@ !getLangOpts().CUDAIsDevice && (D->hasAttr() || D->hasAttr() || D->hasAttr()); - if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar)) + // Device side shadow of initialized host-side global variables are also + // left undefined. + bool IsHIPDeviceShadowVar = getLangOpts().HIP && getLangOpts().CUDAIsDevice && + D->hasAttr(); + if (getLangOpts().CUDA && + (IsCUDASharedVar || IsCUDAShadowVar || IsHIPDeviceShadowVar)) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); else if (!InitExpr) { // This is a tentative definition; tentative definitions are @@ -3880,7 +3886,8 @@ // global variables become internal definitions. These have to // be internal in order to prevent name conflicts with global // host variables with the same name in a different TUs. - if (D->hasAttr() || D->hasAttr()) { + if (D->hasAttr() || D->hasAttr() || + (D->hasAttr() && LangOpts.HIP)) { Linkage = llvm::GlobalValue::InternalLinkage; // Shadow variables and their properties must be registered @@ -3904,7 +3911,8 @@ } } - GV->setInitializer(Init); + if (!IsHIPDeviceShadowVar) + GV->setInitializer(Init); if (emitter) emitter->finalize(GV); // If it is safe to mark the global 'constant', do so now. Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -7848,7 +7848,8 @@ return D->hasAttr() || (isa(D) && D->hasAttr()) || (isa(D) && - (D->hasAttr() || D->hasAttr())); + (D->hasAttr() || D->hasAttr() || + D->hasAttr())); } void AMDGPUTargetCodeGenInfo::setTargetAttributes( Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -6786,6 +6786,10 @@ case ParsedAttr::AT_CUDAHost: handleSimpleAttributeWithExclusions(S, D, AL); break; + case ParsedAttr::AT_CUDADeviceShadow: + handleSimpleAttributeWithExclusions( + S, D, AL); + break; case ParsedAttr::AT_GNUInline: handleGNUInlineAttr(S, D, AL); break; Index: test/AST/ast-dump-cuda-device-shadow.cu =================================================================== --- /dev/null +++ test/AST/ast-dump-cuda-device-shadow.cu @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -fcuda-is-device -ast-dump -ast-dump-filter tex %s | FileCheck -strict-whitespace %s +// RUN: %clang_cc1 -ast-dump -ast-dump-filter tex %s | FileCheck -strict-whitespace %s +struct textureReference { + int a; +}; + +// CHECK: CUDADeviceShadowAttr +template +struct texture : public textureReference { +texture() { a = 1; } +}; + +__attribute__((device_shadow)) texture tex; Index: test/CodeGenCUDA/device-shadow.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/device-shadow.cu @@ -0,0 +1,28 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -std=c++11 -fcuda-is-device \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CUDADEV %s +// RUN: %clang_cc1 -triple x86_64 -std=c++11 \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CUDAHOST %s + +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 -fvisibility hidden -fapply-global-visibility-to-externs \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefixes=HIPDEV %s +// RUN: %clang_cc1 -triple x86_64 -std=c++11 \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefixes=HIPHOST %s + +struct textureReference { + int a; +}; + +template +struct texture : public textureReference { +texture() { a = 1; } +}; + +__attribute__((device_shadow)) texture tex; +// CUDADEV-NOT: @tex +// CUDAHOST-NOT: call i32 @__hipRegisterVar{{.*}}@tex +// HIPDEV: @tex = external protected{{.*}}global %struct.texture +// HIPDEV-NOT: declare{{.*}}void @_ZN7textureIfLi2ELi1EEC1Ev +// HIPHOST: define{{.*}}@_ZN7textureIfLi2ELi1EEC1Ev +// HIPHOST: call i32 @__hipRegisterVar{{.*}}@tex{{.*}}i32 0, i32 4, i32 0, i32 0) Index: test/Misc/pragma-attribute-supported-attributes-list.test =================================================================== --- test/Misc/pragma-attribute-supported-attributes-list.test +++ test/Misc/pragma-attribute-supported-attributes-list.test @@ -26,6 +26,7 @@ // CHECK-NEXT: CPUSpecific (SubjectMatchRule_function) // CHECK-NEXT: CUDAConstant (SubjectMatchRule_variable) // CHECK-NEXT: CUDADevice (SubjectMatchRule_function, SubjectMatchRule_variable) +// CHECK-NEXT: CUDADeviceShadow (SubjectMatchRule_variable) // CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function) // CHECK-NEXT: CUDAHost (SubjectMatchRule_function) // CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType)