Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8146,7 +8146,7 @@ "call to global function %0 not configured">; def err_ref_bad_target : Error< "reference to %select{__device__|__global__|__host__|__host__ __device__}0 " - "function %1 in %select{__device__|__global__|__host__|__host__ __device__}2 function">; + "%select{function|variable}1 %2 in %select{__device__|__global__|__host__|__host__ __device__}3 function">; def err_ref_bad_target_global_initializer : Error< "reference to %select{__device__|__global__|__host__|__host__ __device__}0 " "function %1 in global initializer">; Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -1522,6 +1522,29 @@ if (result.HasSideEffects) return ConstantEmission(); + // In CUDA/HIP device compilation, a lambda may capture a reference variable + // referencing a global host variable by copy. In this case the lambda should + // make a copy of the value of the global host variable. The DRE of the + // captured reference variable cannot be emitted as load from the host + // global variable as compile time constant, since the host variable is not + // accessible on device. The DRE of the captured reference variable has to be + // loaded from captures. + if (CGM.getLangOpts().CUDAIsDevice && + refExpr->refersToEnclosingVariableOrCapture()) { + auto *MD = dyn_cast_or_null(CurCodeDecl); + if (MD && MD->getParent()->isLambda() && + MD->getOverloadedOperator() == OO_Call) { + const APValue::LValueBase &base = result.Val.getLValueBase(); + if (const ValueDecl *D = base.dyn_cast()) { + if (const VarDecl *VD = dyn_cast(D)) { + if (!VD->hasAttr()) { + return ConstantEmission(); + } + } + } + } + } + // Emit as a constant. auto C = ConstantEmitter(*this).emitAbstract(refExpr->getLocation(), result.Val, resultType); Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -743,7 +743,8 @@ return true; SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) - << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); + << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee + << IdentifyCUDATarget(Caller); if (!Callee->getBuiltinID()) SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, Caller, *this) Index: clang/lib/Sema/SemaExpr.cpp =================================================================== --- clang/lib/Sema/SemaExpr.cpp +++ clang/lib/Sema/SemaExpr.cpp @@ -354,6 +354,21 @@ diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc); + if (LangOpts.CUDAIsDevice) { + auto *FD = dyn_cast_or_null(CurContext); + auto Target = IdentifyCUDATarget(FD); + if (FD && Target != CFT_Host) { + const auto *VD = dyn_cast(D); + if (VD && VD->hasGlobalStorage() && !VD->hasAttr() && + !VD->hasAttr() && !VD->hasAttr() && + !VD->getType()->isCUDADeviceBuiltinSurfaceType() && + !VD->getType()->isCUDADeviceBuiltinTextureType() && + !VD->isConstexpr() && !VD->getType().isConstQualified()) + targetDiag(*Locs.begin(), diag::err_ref_bad_target) + << /*host*/ 2 << /*variable*/ 1 << VD << Target; + } + } + if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)) { if (const auto *VD = dyn_cast(D)) checkDeviceDecl(VD, Loc); @@ -1939,13 +1954,47 @@ if (isUnevaluatedContext()) return NOUR_Unevaluated; + // CUDA/HIP: Check whether a captured reference variable is referencing a + // host variable in a device or host device lambda. + auto IsCapturingReferenceToHostVarInCUDADeviceLambda = [&](VarDecl *VD) { + if (!getLangOpts().CUDA || !VD->hasInit()) + return false; + assert(VD->getType()->isReferenceType()); + + // Check whether the reference variable is referencing a host variable. + auto *DRE = dyn_cast(VD->getInit()); + if (!DRE) + return false; + auto *Referee = dyn_cast(DRE->getDecl()); + if (!Referee || !Referee->hasGlobalStorage() || + Referee->hasAttr()) + return false; + + // Check whether the current function is a device or host device lambda. + // Check whether the reference variable is a capture by getDeclContext() + // since refersToEnclosingVariableOrCapture() is not ready at this point. + auto *MD = dyn_cast_or_null(CurContext); + if (MD && MD->getParent()->isLambda() && + MD->getOverloadedOperator() == OO_Call && + MD->hasAttr() && VD->getDeclContext() != MD) + return true; + + return false; + }; // C++2a [basic.def.odr]p4: // A variable x whose name appears as a potentially-evaluated expression e // is odr-used by e unless [...] x is a reference that is usable in // constant expressions. + // CUDA/HIP: + // If a reference variable referencing a host variable is captured in a + // device or host device lambda, the value of the referee must be copied + // to the capture and the reference variable must be treated as odr-use + // since the value of the referee is not known at compile time and must + // be loaded from the captured. if (VarDecl *VD = dyn_cast(D)) { if (VD->getType()->isReferenceType() && !(getLangOpts().OpenMP && isOpenMPCapturedDecl(D)) && + !IsCapturingReferenceToHostVarInCUDADeviceLambda(VD) && VD->isUsableInConstantExpressions(Context)) return NOUR_Constant; } Index: clang/test/CodeGenCUDA/function-overload.cu =================================================================== --- clang/test/CodeGenCUDA/function-overload.cu +++ clang/test/CodeGenCUDA/function-overload.cu @@ -12,6 +12,9 @@ #include "Inputs/cuda.h" // Check constructors/destructors for D/H functions +#ifdef __CUDA_ARCH__ +__device__ +#endif int x; struct s_cd_dh { __host__ s_cd_dh() { x = 11; } Index: clang/test/CodeGenCUDA/lambda-reference-var.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/lambda-reference-var.cu @@ -0,0 +1,126 @@ +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple x86_64-linux-gnu \ +// RUN: | FileCheck -check-prefix=HOST %s +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s + +#include "Inputs/cuda.h" + +// HOST: %[[T1:.*]] = type <{ i32*, i32, [4 x i8] }> +// HOST: %[[T2:.*]] = type { i32*, i32** } +// HOST: %[[T3:.*]] = type <{ i32*, i32, [4 x i8] }> +// DEV: %[[T1:.*]] = type { i32* } +// DEV: %[[T2:.*]] = type { i32** } +// DEV: %[[T3:.*]] = type <{ i32*, i32, [4 x i8] }> +int global_host_var; +__device__ int global_device_var; + +template +__global__ void kern(F f) { f(); } + +// DEV-LABEL: @_ZZ27dev_capture_dev_ref_by_copyPiENKUlvE_clEv( +// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: store i32 %[[VAL]] +__device__ void dev_capture_dev_ref_by_copy(int *out) { + int &ref = global_device_var; + [=](){ *out = ref;}(); +} + +// DEV-LABEL: @_ZZ26dev_capture_dev_ref_by_refPiENKUlvE_clEv( +// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1 +// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: store i32 %[[VAL]] +__device__ void dev_capture_dev_ref_by_ref(int *out) { + int &ref = global_device_var; + [&](){ ref++; *out = ref;}(); +} + +// DEV-LABEL: define void @_Z7dev_refPi( +// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1 +// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: store i32 %[[VAL]] +__device__ void dev_ref(int *out) { + int &ref = global_device_var; + ref++; + *out = ref; +} + +// DEV-LABEL: @_ZZ14dev_lambda_refPiENKUlvE_clEv( +// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1 +// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*) +// DEV: store i32 %[[VAL]] +__device__ void dev_lambda_ref(int *out) { + [=](){ + int &ref = global_device_var; + ref++; + *out = ref; + }(); +} + +// HOST-LABEL: @_ZZ29host_capture_host_ref_by_copyPiENKUlvE_clEv( +// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var +// HOST: store i32 %[[VAL]] +void host_capture_host_ref_by_copy(int *out) { + int &ref = global_host_var; + [=](){ *out = ref;}(); +} + +// HOST-LABEL: @_ZZ28host_capture_host_ref_by_refPiENKUlvE_clEv( +// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T2]], %[[T2]]* %this1, i32 0, i32 0 +// HOST: %[[REF:.*]] = load i32*, i32** %[[CAP]] +// HOST: %[[VAL:.*]] = load i32, i32* %[[REF]] +// HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1 +// HOST: store i32 %[[VAL2]], i32* %[[REF]] +// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var +// HOST: store i32 %[[VAL]] +void host_capture_host_ref_by_ref(int *out) { + int &ref = global_host_var; + [&](){ ref++; *out = ref;}(); +} + +// HOST-LABEL: define void @_Z8host_refPi( +// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var +// HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1 +// HOST: store i32 %[[VAL2]], i32* @global_host_var +// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var +// HOST: store i32 %[[VAL]] +void host_ref(int *out) { + int &ref = global_host_var; + ref++; + *out = ref; +} + +// HOST-LABEL: @_ZZ15host_lambda_refPiENKUlvE_clEv( +// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var +// HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1 +// HOST: store i32 %[[VAL2]], i32* @global_host_var +// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var +// HOST: store i32 %[[VAL]] +void host_lambda_ref(int *out) { + [=](){ + int &ref = global_host_var; + ref++; + *out = ref; + }(); +} + +// HOST-LABEL: define void @_Z28dev_capture_host_ref_by_copyPi( +// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T3]], %[[T3]]* %{{.*}}, i32 0, i32 1 +// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var +// HOST: store i32 %[[VAL]], i32* %[[CAP]] +// DEV-LABEL: define internal void @_ZZ28dev_capture_host_ref_by_copyPiENKUlvE_clEv( +// DEV: %[[CAP:.*]] = getelementptr inbounds %[[T3]], %[[T3]]* %this1, i32 0, i32 1 +// DEV: %[[VAL:.*]] = load i32, i32* %[[CAP]] +// DEV: store i32 %[[VAL]] +void dev_capture_host_ref_by_copy(int *out) { + int &ref = global_host_var; + kern<<<1, 1>>>([=]__device__() { *out = ref;}); +} + Index: clang/test/SemaCUDA/device-use-host-var.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/device-use-host-var.cu @@ -0,0 +1,160 @@ +// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -fsyntax-only -verify=dev %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify=host %s + +// host-no-diagnostics + +#include "Inputs/cuda.h" + +int global_host_var; +__device__ int global_dev_var; +__constant__ int global_constant_var; +__shared__ int global_shared_var; +constexpr int global_constexpr_var = 1; +const int global_const_var = 1; + +template +__global__ void kernel(F f) { f(); } // dev-note2 {{called by 'kernel<(lambda}} + +__device__ void dev_fun(int *out) { + int &ref_host_var = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}} + int &ref_dev_var = global_dev_var; + int &ref_constant_var = global_constant_var; + int &ref_shared_var = global_shared_var; + const int &ref_constexpr_var = global_constexpr_var; + const int &ref_const_var = global_const_var; + + *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}} + *out = global_dev_var; + *out = global_constant_var; + *out = global_shared_var; + *out = global_constexpr_var; + *out = global_const_var; + + *out = ref_host_var; + *out = ref_dev_var; + *out = ref_constant_var; + *out = ref_shared_var; + *out = ref_constexpr_var; + *out = ref_const_var; +} + +__global__ void global_fun(int *out) { + int &ref_host_var = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __global__ function}} + int &ref_dev_var = global_dev_var; + int &ref_constant_var = global_constant_var; + int &ref_shared_var = global_shared_var; + const int &ref_constexpr_var = global_constexpr_var; + const int &ref_const_var = global_const_var; + + *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __global__ function}} + *out = global_dev_var; + *out = global_constant_var; + *out = global_shared_var; + *out = global_constexpr_var; + *out = global_const_var; + + *out = ref_host_var; + *out = ref_dev_var; + *out = ref_constant_var; + *out = ref_shared_var; + *out = ref_constexpr_var; + *out = ref_const_var; +} + +__host__ __device__ void host_dev_fun(int *out) { + int &ref_host_var = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}} + int &ref_dev_var = global_dev_var; + int &ref_constant_var = global_constant_var; + int &ref_shared_var = global_shared_var; + const int &ref_constexpr_var = global_constexpr_var; + const int &ref_const_var = global_const_var; + + *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}} + *out = global_dev_var; + *out = global_constant_var; + *out = global_shared_var; + *out = global_constexpr_var; + *out = global_const_var; + + *out = ref_host_var; + *out = ref_dev_var; + *out = ref_constant_var; + *out = ref_shared_var; + *out = ref_constexpr_var; + *out = ref_const_var; +} + +inline __host__ __device__ void inline_host_dev_fun(int *out) { + int &ref_host_var = global_host_var; + int &ref_dev_var = global_dev_var; + int &ref_constant_var = global_constant_var; + int &ref_shared_var = global_shared_var; + const int &ref_constexpr_var = global_constexpr_var; + const int &ref_const_var = global_const_var; + + *out = global_host_var; + *out = global_dev_var; + *out = global_constant_var; + *out = global_shared_var; + *out = global_constexpr_var; + *out = global_const_var; + + *out = ref_host_var; + *out = ref_dev_var; + *out = ref_constant_var; + *out = ref_shared_var; + *out = ref_constexpr_var; + *out = ref_const_var; +} + +void dev_lambda_capture_by_ref(int *out) { + int &ref_host_var = global_host_var; + kernel<<<1,1>>>([&]() { + int &ref_dev_var = global_dev_var; + int &ref_constant_var = global_constant_var; + int &ref_shared_var = global_shared_var; + const int &ref_constexpr_var = global_constexpr_var; + const int &ref_const_var = global_const_var; + + *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}} + // dev-error@-1 {{capture host variable 'out' by reference in device or host device lambda function}} + *out = global_dev_var; + *out = global_constant_var; + *out = global_shared_var; + *out = global_constexpr_var; + *out = global_const_var; + + *out = ref_host_var; // dev-error {{capture host variable 'ref_host_var' by reference in device or host device lambda function}} + *out = ref_dev_var; + *out = ref_constant_var; + *out = ref_shared_var; + *out = ref_constexpr_var; + *out = ref_const_var; + }); +} + +void dev_lambda_capture_by_copy(int *out) { + int &ref_host_var = global_host_var; + kernel<<<1,1>>>([=]() { + int &ref_dev_var = global_dev_var; + int &ref_constant_var = global_constant_var; + int &ref_shared_var = global_shared_var; + const int &ref_constexpr_var = global_constexpr_var; + const int &ref_const_var = global_const_var; + + *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}} + *out = global_dev_var; + *out = global_constant_var; + *out = global_shared_var; + *out = global_constexpr_var; + *out = global_const_var; + + *out = ref_host_var; + *out = ref_dev_var; + *out = ref_constant_var; + *out = ref_shared_var; + *out = ref_constexpr_var; + *out = ref_const_var; + }); +} +