Index: clang/include/clang/AST/ASTContext.h =================================================================== --- clang/include/clang/AST/ASTContext.h +++ clang/include/clang/AST/ASTContext.h @@ -3018,6 +3018,9 @@ /// Return a new OMPTraitInfo object owned by this context. OMPTraitInfo &getNewOMPTraitInfo(); + /// Whether a C++ static variable should be externalized. + bool shouldExternalizeStaticVar(const Decl *D) const; + private: /// All OMPTraitInfo objects live in this collection, one per /// `pragma omp [begin] declare variant` directive. Index: clang/include/clang/Basic/LangOptions.h =================================================================== --- clang/include/clang/Basic/LangOptions.h +++ clang/include/clang/Basic/LangOptions.h @@ -293,6 +293,12 @@ /// host code generation. std::string OMPHostIRFile; + /// The user provided compilation unit ID, if non-empty. This is used to + /// externalize static variables which is needed to support accessing static + /// device variables in host code for single source offloading languages + /// like CUDA/HIP. + std::string CUID; + /// Indicates whether the front-end is explicitly told that the /// input is a header file (i.e. -x c-header). bool IsHeaderFile = false; Index: clang/include/clang/Driver/Action.h =================================================================== --- clang/include/clang/Driver/Action.h +++ clang/include/clang/Driver/Action.h @@ -214,14 +214,18 @@ class InputAction : public Action { const llvm::opt::Arg &Input; - + std::string Id; virtual void anchor(); public: - InputAction(const llvm::opt::Arg &Input, types::ID Type); + InputAction(const llvm::opt::Arg &Input, types::ID Type, + StringRef Id = StringRef()); const llvm::opt::Arg &getInputArg() const { return Input; } + void setId(StringRef _Id) { Id = _Id.str(); } + StringRef getId() const { return Id; } + static bool classof(const Action *A) { return A->getKind() == InputClass; } Index: clang/include/clang/Driver/Compilation.h =================================================================== --- clang/include/clang/Driver/Compilation.h +++ clang/include/clang/Driver/Compilation.h @@ -297,6 +297,8 @@ /// Return whether an error during the parsing of the input args. bool containsError() const { return ContainsError; } + void setContainsError() { ContainsError = true; } + /// Redirect - Redirect output of this compilation. Can only be done once. /// /// \param Redirects - array of optional paths. The array should have a size Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -646,6 +646,18 @@ def gpu_max_threads_per_block_EQ : Joined<["--"], "gpu-max-threads-per-block=">, Flags<[CC1Option]>, HelpText<"Default max threads per block for kernel launch bounds for HIP">; +def cuid_EQ : Joined<["-"], "cuid=">, Flags<[CC1Option]>, + HelpText<"An ID for compilation unit, which should be the same for the same " + "compilation unit but different for different compilation units. " + "It is used to externalize device-side static variables for single " + "source offloading languages CUDA and HIP so that they can be " + "accessed by the host code of the same compilation unit.">; +def fuse_cuid_EQ : Joined<["-"], "fuse-cuid=">, + HelpText<"Method to generate ID's for compilation units for single source " + "offloading languages CUDA and HIP: 'hash' (ID's generated by hashing " + "file path and command line options) | 'random' (ID's generated as " + "random numbers) | 'none' (disabled). Default is 'hash'. This option " + "will be overriden by option '-cuid=[ID]' if it is specified." >; def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group, HelpText<"Path to libomptarget-nvptx libraries">; def dD : Flag<["-"], "dD">, Group, Flags<[CC1Option]>, Index: clang/lib/AST/ASTContext.cpp =================================================================== --- clang/lib/AST/ASTContext.cpp +++ clang/lib/AST/ASTContext.cpp @@ -10273,12 +10273,20 @@ } else if (D->hasAttr()) { if (L == GVA_DiscardableODR) return GVA_StrongODR; - } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice && - D->hasAttr()) { + } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) { // Device-side functions with __global__ attribute must always be // visible externally so they can be launched from host. - if (L == GVA_DiscardableODR || L == GVA_Internal) + if (D->hasAttr() && + (L == GVA_DiscardableODR || L == GVA_Internal)) return GVA_StrongODR; + // Single source offloading languages like CUDA/HIP need to be able to + // access static device variables from host code of the same compilation + // unit. This is done by externalizing the static variable with a shared + // name between the host and device compilation which is the same for the + // same compilation unit whereas different among different compilation + // units. + if (Context.shouldExternalizeStaticVar(D)) + return GVA_StrongExternal; } return L; } @@ -11164,3 +11172,10 @@ return DB << Section.Decl; return DB << "a prior #pragma section"; } + +bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const { + return !getLangOpts().CUID.empty() && + (D->hasAttr() || D->hasAttr()) && + isa(D) && cast(D)->isFileVarDecl() && + cast(D)->getStorageClass() == SC_Static; +} Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -234,6 +234,15 @@ DeviceSideName = std::string(Out.str()); } else DeviceSideName = std::string(ND->getIdentifier()->getName()); + + // Make unique name for device side static file-scope variable for HIP. + if (CGM.getContext().shouldExternalizeStaticVar(ND)) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << DeviceSideName; + CGM.printPostfixForExternalizedStaticVar(Out); + DeviceSideName = std::string(Out.str()); + } return DeviceSideName; } Index: clang/lib/CodeGen/CodeGenModule.h =================================================================== --- clang/lib/CodeGen/CodeGenModule.h +++ clang/lib/CodeGen/CodeGenModule.h @@ -1410,6 +1410,10 @@ TBAAAccessInfo *TBAAInfo = nullptr); bool stopAutoInit(); + /// Print the postfix for externalized static variable for single source + /// offloading languages CUDA and HIP. + void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const; + private: llvm::Constant *GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -1091,6 +1091,9 @@ } } + // Make unique name for device side static file-scope variable for HIP. + if (CGM.getContext().shouldExternalizeStaticVar(ND)) + CGM.printPostfixForExternalizedStaticVar(Out); return std::string(Out.str()); } @@ -6060,3 +6063,8 @@ } return false; } + +void CodeGenModule::printPostfixForExternalizedStaticVar( + llvm::raw_ostream &OS) const { + OS << ".static." << getLangOpts().CUID; +} Index: clang/lib/Driver/Action.cpp =================================================================== --- clang/lib/Driver/Action.cpp +++ clang/lib/Driver/Action.cpp @@ -165,8 +165,8 @@ void InputAction::anchor() {} -InputAction::InputAction(const Arg &_Input, types::ID _Type) - : Action(InputClass, _Type), Input(_Input) {} +InputAction::InputAction(const Arg &_Input, types::ID _Type, StringRef _Id) + : Action(InputClass, _Type), Input(_Input), Id(_Id.str()) {} void BindArchAction::anchor() {} Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -73,6 +73,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Host.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Process.h" @@ -2381,6 +2382,14 @@ /// Default GPU architecture if there's no one specified. CudaArch DefaultCudaArch = CudaArch::UNKNOWN; + /// Method to generate compilation unit ID specified by option + /// '-fuse-cuid='. + enum UseCUIDKind { CUID_Hash, CUID_Random, CUID_None, CUID_Invalid }; + UseCUIDKind UseCUID = CUID_Hash; + + /// Compilation unit ID specified by option '-cuid='. + StringRef FixedCUID; + public: CudaActionBuilderBase(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs, @@ -2416,9 +2425,32 @@ // Replicate inputs for each GPU architecture. auto Ty = IA->getType() == types::TY_HIP ? types::TY_HIP_DEVICE : types::TY_CUDA_DEVICE; + std::string CUID = FixedCUID.str(); + if (CUID.empty()) { + if (UseCUID == CUID_Random) + CUID = llvm::utohexstr(llvm::sys::Process::GetRandomNumber(), + /*LowerCase=*/true); + else if (UseCUID == CUID_Hash) { + llvm::MD5 Hasher; + llvm::MD5::MD5Result Hash; + SmallString<256> RealPath; + llvm::sys::fs::real_path(IA->getInputArg().getValue(), RealPath, + /*expand_tilde=*/true); + Hasher.update(RealPath); + for (auto *A : Args) { + if (A->getOption().matches(options::OPT_INPUT)) + continue; + Hasher.update(A->getAsString(Args)); + } + Hasher.final(Hash); + CUID = llvm::utohexstr(Hash.low(), /*LowerCase=*/true); + } + } + IA->setId(CUID); + for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { CudaDeviceActions.push_back( - C.MakeAction(IA->getInputArg(), Ty)); + C.MakeAction(IA->getInputArg(), Ty, IA->getId())); } return ABRT_Success; @@ -2534,6 +2566,21 @@ options::OPT_cuda_device_only); EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); EmitAsm = Args.getLastArg(options::OPT_S); + FixedCUID = Args.getLastArgValue(options::OPT_cuid_EQ); + if (Arg *A = Args.getLastArg(options::OPT_fuse_cuid_EQ)) { + StringRef UseCUIDStr = A->getValue(); + UseCUID = llvm::StringSwitch(UseCUIDStr) + .Case("hash", CUID_Hash) + .Case("random", CUID_Random) + .Case("none", CUID_None) + .Default(CUID_Invalid); + if (UseCUID == CUID_Invalid) { + C.getDriver().Diag(diag::err_drv_invalid_value) + << A->getAsString(Args) << UseCUIDStr; + C.setContainsError(); + return true; + } + } // Collect all cuda_gpu_arch parameters, removing duplicates. std::set GpuArchs; Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6021,6 +6021,18 @@ CmdArgs.push_back("-fcuda-short-ptr"); } + if (IsCuda || IsHIP) { + // Determine the original source input. + const Action *SourceAction = &JA; + while (SourceAction->getKind() != Action::InputClass) { + assert(!SourceAction->getInputs().empty() && "unexpected root action!"); + SourceAction = SourceAction->getInputs()[0]; + } + auto CUID = cast(SourceAction)->getId(); + if (!CUID.empty()) + CmdArgs.push_back(Args.MakeArgString(Twine("-cuid=") + Twine(CUID))); + } + if (IsHIP) CmdArgs.push_back("-fcuda-allow-variadic-functions"); Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -2629,6 +2629,7 @@ << Args.getLastArg(OPT_fgpu_allow_device_init)->getAsString(Args); } Opts.HIPUseNewLaunchAPI = Args.hasArg(OPT_fhip_new_launch_api); + Opts.CUID = std::string(Args.getLastArgValue(OPT_cuid_EQ)); if (Opts.HIP) Opts.GPUMaxThreadsPerBlock = getLastArgIntValue( Args, OPT_gpu_max_threads_per_block_EQ, Opts.GPUMaxThreadsPerBlock); Index: clang/test/CodeGenCUDA/static-device-var.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/static-device-var.cu @@ -0,0 +1,84 @@ +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=DEV,INT-DEV %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=HOST,INT-HOST %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=123abc \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=DEV,EXT-DEV %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=123abc \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=HOST,EXT-HOST %s + +#include "Inputs/cuda.h" + +// Test function scope static device variable, which should not be externalized. +// DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1 + +// Test normal static device variables +// INT-DEV-DAG: @_ZL1x = internal addrspace(1) global i32 0 +// INT-HOST-DAG: @_ZL1x = internal global i32 undef +// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00" + +// Test externalized static device variables +// EXT-DEV-DAG: @_ZL1x.static.123abc = addrspace(1) externally_initialized global i32 0 +// EXT-HOST-DAG: @_ZL1x.static.123abc = internal global i32 undef +// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.123abc\00" + +static __device__ int x; + +// Test normal static device variables +// INT-DEV-DAG: @_ZL1y = internal addrspace(4) global i32 0 +// INT-HOST-DAG: @_ZL1y = internal global i32 undef +// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" + +// Test externalized static device variables +// EXT-DEV-DAG: @_ZL1y.static.123abc = addrspace(4) externally_initialized global i32 0 +// EXT-HOST-DAG: @_ZL1y.static.123abc = internal global i32 undef +// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.123abc\00" + +static __constant__ int y; + +// Test static host variable, which should not be externalized nor registered. +// HOST-DAG: @_ZL1z = internal global i32 0 +// DEV-NOT: @_ZL1z +static int z; + +// Test static device variable in inline function, which should not be +// externalized nor registered. +// DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat + +inline __device__ void devfun(const int ** b) { + const static int p = 2; + b[0] = &p; +} + +__global__ void kernel(int *a, const int **b) { + const static int w = 1; + a[0] = x; + a[1] = y; + b[0] = &w; + devfun(b); +} + +int* getDeviceSymbol(int *x); + +void foo() { + getDeviceSymbol(&x); + getDeviceSymbol(&y); + z = 123; +} + +// INT-HOST: __hipRegisterVar({{.*}}@_ZL1x{{.*}}@[[DEVNAMEX]] +// INT-HOST: __hipRegisterVar({{.*}}@_ZL1y{{.*}}@[[DEVNAMEY]] +// EXT-HOST: __hipRegisterVar({{.*}}@_ZL1x.static.123abc{{.*}}@[[DEVNAMEX]] +// EXT-HOST: __hipRegisterVar({{.*}}@_ZL1y.static.123abc{{.*}}@[[DEVNAMEY]] +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p Index: clang/test/Driver/hip-cuid.hip =================================================================== --- /dev/null +++ clang/test/Driver/hip-cuid.hip @@ -0,0 +1,130 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// Check invalid -fuse-cuid= option. + +// RUN: not %clang -### -x hip \ +// RUN: -target x86_64-unknown-linux-gnu \ +// RUN: --offload-arch=gfx900 \ +// RUN: --offload-arch=gfx906 \ +// RUN: -c -nogpulib -fuse-cuid=invalid \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck -check-prefixes=INVALID %s + +// INVALID: invalid value 'invalid' in '-fuse-cuid=invalid' + +// Check random CUID generator. + +// RUN: %clang -### -x hip \ +// RUN: -target x86_64-unknown-linux-gnu \ +// RUN: --offload-arch=gfx900 \ +// RUN: --offload-arch=gfx906 \ +// RUN: -c -nogpulib -fuse-cuid=random \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,HEX %s + +// Check fixed CUID. + +// RUN: %clang -### -x hip \ +// RUN: -target x86_64-unknown-linux-gnu \ +// RUN: --offload-arch=gfx900 \ +// RUN: --offload-arch=gfx906 \ +// RUN: -c -nogpulib -cuid=abcd \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,FIXED %s + +// Check fixed CUID override -fuse-cuid. + +// RUN: %clang -### -x hip \ +// RUN: -target x86_64-unknown-linux-gnu \ +// RUN: --offload-arch=gfx900 \ +// RUN: --offload-arch=gfx906 \ +// RUN: -c -nogpulib -fuse-cuid=random -cuid=abcd \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,FIXED %s + +// Check hash CUID generator. + +// RUN: %clang -### -x hip \ +// RUN: -target x86_64-unknown-linux-gnu \ +// RUN: --offload-arch=gfx900 \ +// RUN: --offload-arch=gfx906 \ +// RUN: -c -nogpulib -fuse-cuid=hash \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,HEX %s + +// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "amdgcn-amd-amdhsa" +// COMMON-SAME: "-target-cpu" "gfx900" +// HEX-SAME: "-cuid=[[CUID:[0-9a-f]+]]" +// FIXED-SAME: "-cuid=[[CUID:abcd]]" +// COMMON-SAME: "{{.*}}a.cu" + +// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "amdgcn-amd-amdhsa" +// COMMON-SAME: "-target-cpu" "gfx906" +// COMMON-SAME: "-cuid=[[CUID]]" +// COMMON-SAME: "{{.*}}a.cu" + +// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// COMMON-SAME: "-cuid=[[CUID]]" +// COMMON-SAME: "{{.*}}a.cu" + +// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "amdgcn-amd-amdhsa" +// COMMON-SAME: "-target-cpu" "gfx900" +// HEX-NOT: "-cuid=[[CUID]]" +// HEX-SAME: "-cuid=[[CUID2:[0-9a-f]+]]" +// FIXED-SAME: "-cuid=[[CUID2:abcd]]" +// COMMON-SAME: "{{.*}}b.hip" + +// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "amdgcn-amd-amdhsa" +// COMMON-SAME: "-target-cpu" "gfx906" +// HEX-NOT: "-cuid=[[CUID]]" +// COMMON-SAME: "-cuid=[[CUID2]]" +// COMMON-SAME: "{{.*}}b.hip" + +// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// HEX-NOT: "-cuid=[[CUID]]" +// COMMON-SAME: "-cuid=[[CUID2]]" +// COMMON-SAME: "{{.*}}b.hip" + +// Check CUID generated by hash. +// The same CUID is generated for the same file with the same options. + +// RUN: rm -rf %t.out + +// RUN: %clang -### -x hip -target x86_64-unknown-linux-gnu \ +// RUN: --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu >%t.out 2>&1 + +// RUN: %clang -### -x hip -target x86_64-unknown-linux-gnu \ +// RUN: --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu >>%t.out 2>&1 + +// RUN: FileCheck %s -check-prefixes=HASH -input-file %t.out + +// HASH: "{{.*}}clang{{.*}}" {{.*}} "-target-cpu" "gfx906" {{.*}}"-cuid=[[CUID:[0-9a-f]+]]" +// HASH: "{{.*}}clang{{.*}}" {{.*}} "-target-cpu" "gfx906" {{.*}}"-cuid=[[CUID]]" + + +// Check CUID generated by hash. +// Different CUID's are generated for the same file with different options. + +// RUN: rm -rf %t.out + +// RUN: %clang -### -x hip -target x86_64-unknown-linux-gnu -DX=1 \ +// RUN: --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu >%t.out 2>&1 + +// RUN: %clang -### -x hip -target x86_64-unknown-linux-gnu -DX=2 \ +// RUN: --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \ +// RUN: %S/Inputs/../Inputs/hip_multiple_inputs/a.cu >>%t.out 2>&1 + +// RUN: FileCheck %s -check-prefixes=HASH2 -input-file %t.out + +// HASH2: "{{.*}}clang{{.*}}" {{.*}} "-target-cpu" "gfx906" {{.*}}"-cuid=[[CUID:[0-9a-f]+]]" +// HASH2-NOT: "{{.*}}clang{{.*}}" {{.*}} "-target-cpu" "gfx906" {{.*}}"-cuid=[[CUID]]" Index: clang/test/SemaCUDA/static-device-var.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/static-device-var.cu @@ -0,0 +1,37 @@ +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ +// RUN: -emit-llvm -o - %s -fsyntax-only -verify + +// RUN: %clang_cc1 -triple x86_64-gnu-linux \ +// RUN: -emit-llvm -o - %s -fsyntax-only -verify + +#include "Inputs/cuda.h" + +__device__ void f1() { + const static int b = 123; + static int a; + // expected-error@-1 {{within a __device__ function, only __shared__ variables or const variables without device memory qualifier may be marked 'static'}} +} + +__global__ void k1() { + const static int b = 123; + static int a; + // expected-error@-1 {{within a __global__ function, only __shared__ variables or const variables without device memory qualifier may be marked 'static'}} +} + +static __device__ int x; +static __constant__ int y; + +__global__ void kernel(int *a) { + a[0] = x; + a[1] = y; +} + +int* getDeviceSymbol(int *x); + +void foo() { + getDeviceSymbol(&x); + getDeviceSymbol(&y); +}