Index: include/clang/Basic/Builtins.h =================================================================== --- include/clang/Basic/Builtins.h +++ include/clang/Basic/Builtins.h @@ -81,6 +81,11 @@ return getRecord(ID).Type; } + /// \brief Return true if this function is a target-specific builtin + bool isTSBuiltin(unsigned ID) const { + return ID >= Builtin::FirstTSBuiltin; + } + /// \brief Return true if this function has no side effects and doesn't /// read memory. bool isConst(unsigned ID) const { Index: include/clang/Basic/LangOptions.def =================================================================== --- include/clang/Basic/LangOptions.def +++ include/clang/Basic/LangOptions.def @@ -166,6 +166,7 @@ LANGOPT(CUDAIsDevice , 1, 0, "Compiling for CUDA device") LANGOPT(CUDAAllowHostCallsFromHostDevice, 1, 0, "Allow host device functions to call host functions") LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)") +LANGOPT(CUDATargetOverloads, 1, 0, "Enable function overloads based on CUDA target attributes") LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators") LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions") Index: include/clang/Driver/CC1Options.td =================================================================== --- include/clang/Driver/CC1Options.td +++ include/clang/Driver/CC1Options.td @@ -659,6 +659,8 @@ HelpText<"Disable all cross-target (host, device, etc.) call checks in CUDA">; def fcuda_include_gpubinary : Separate<["-"], "fcuda-include-gpubinary">, HelpText<"Incorporate CUDA device-side binary into host object file.">; +def fcuda_target_overloads : Flag<["-"], "fcuda-target-overloads">, + HelpText<"Enable function overloads based on CUDA target attributes.">; } // let Flags = [CC1Option] Index: include/clang/Sema/Sema.h =================================================================== --- include/clang/Sema/Sema.h +++ include/clang/Sema/Sema.h @@ -8594,6 +8594,17 @@ CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D); + enum CUDAFunctionPreference { + CFP_Never, // Invalid caller/callee combination. + CFP_LastResort, // Lowest priority. Only in effect if + // LangOpts.CUDADisableTargetCallChecks is true. + CFP_Fallback, // Low priority caller/callee combination + CFP_Best, // Preferred caller/callee combination + }; + + CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller, + const FunctionDecl *Callee); + bool CheckCUDATarget(const FunctionDecl *Caller, const FunctionDecl *Callee); /// Given a implicit special member, infer its CUDA target from the Index: lib/AST/ItaniumMangle.cpp =================================================================== --- lib/AST/ItaniumMangle.cpp +++ lib/AST/ItaniumMangle.cpp @@ -498,6 +498,11 @@ FunctionTypeDepth.pop(Saved); } + // __host__ __device__ functions co-exist with both __host__ and + // __device__ functions, so they need a different mangled name. + if (FD->hasAttr() && FD->hasAttr()) + Out << "Ua6deviceUa4host"; + // Whether the mangling of a function type includes the return type depends on // the context and the nature of the function. The rules for deciding whether // the return type is included are: Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -1412,6 +1412,9 @@ if (Args.hasArg(OPT_fcuda_disable_target_call_checks)) Opts.CUDADisableTargetCallChecks = 1; + if (Args.hasArg(OPT_fcuda_target_overloads)) + Opts.CUDATargetOverloads = 1; + if (Opts.ObjC1) { if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { StringRef value = arg->getValue(); Index: lib/Sema/SemaCUDA.cpp =================================================================== --- lib/Sema/SemaCUDA.cpp +++ lib/Sema/SemaCUDA.cpp @@ -60,8 +60,99 @@ return CFT_Host; } +// * CUDA Call preference table +// +// F - from, +// T - to +// Ph - preference in host mode +// Pd - preference in device mode +// H - handled in (x) +// Preferences: b-best, f-fallback, l-last resort, n-never. +// +// | F | T | Ph | Pd | H | | +// |----+----+----+----+-----+-----------| +// | d | d | b | b | (b) | | +// | d | g | n | n | (a) | | +// | d | h | l | l | (e) | | +// | d | hd | f | f | (c) | | +// | g | d | b | b | (b) | | +// | g | g | n | n | (a) | dyn exec? | +// | g | h | l | l | (e) | dyn exec? | +// | g | hd | f | f | (c) | | +// | h | d | l | l | (e) | | +// | h | g | b | b | (b) | | +// | h | h | b | b | (b) | | +// | h | hd | f | f | (c) | | +// | hd | d | l | f | (d) | | +// | hd | g | f | n | (d/a)| | +// | hd | h | f | l | (d) | | +// | hd | hd | b | b | (b) | | + +Sema::CUDAFunctionPreference +Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, + const FunctionDecl *Callee) { + assert(getLangOpts().CUDATargetOverloads && + "Should not be called w/o enabled target overloads."); + + assert(Callee && "Callee must be valid."); + CUDAFunctionTarget CallerTarget = + Caller ? IdentifyCUDATarget(Caller) : Sema::CFT_Host, + CalleeTarget = IdentifyCUDATarget(Callee); + // If one of the targets is invalid, the check always fails, no matter what + // the other target is. + if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) + return CFP_Never; + + // (a) Can't call global from global until we support dynamic execution. + if (CalleeTarget == CFT_Global && + (CallerTarget == CFT_Global || CallerTarget == CFT_Device || + (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice))) + return CFP_Never; + + // (b) Best case scenarios + if (CalleeTarget == CallerTarget || + (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || + (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) + return CFP_Best; + + // (c) Calling HostDevice is OK as a fallback that works for everyone. + if (CalleeTarget == CFT_HostDevice) + return CFP_Fallback; + + // Figure out what should be returned 'last resort' cases. Normally + // those would not be allowed, but we'll consider them if + // CUDADisableTargetCallChecks is true. + CUDAFunctionPreference QuestionableResult = + getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never; + + // (d) HostDevice behavior depends on compilation mode. + if (CallerTarget == CFT_HostDevice) { + // Calling a function that matches compilation mode is OK. + // Calling a function from the other side is frowned upon. + if (getLangOpts().CUDAIsDevice) + return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult; + else + return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global) + ? CFP_Fallback + : QuestionableResult; + } + + // (e) Calling across device/host boundary is not something you should do. + if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || + (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || + (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) + return QuestionableResult; + + llvm_unreachable("All cases should've been handled by now."); +} + bool Sema::CheckCUDATarget(const FunctionDecl *Caller, const FunctionDecl *Callee) { + // With target overloads enabled, we only disallow calling + // combinations with CFP_Never. + if (getLangOpts().CUDATargetOverloads) + return IdentifyCUDAPreference(Caller,Callee) == CFP_Never; + // The CUDADisableTargetCallChecks short-circuits this check: we assume all // cross-target calls are valid. if (getLangOpts().CUDADisableTargetCallChecks) Index: lib/Sema/SemaChecking.cpp =================================================================== --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -526,7 +526,7 @@ // Since the target specific builtins for each arch overlap, only check those // of the arch we are compiling for. - if (BuiltinID >= Builtin::FirstTSBuiltin) { + if (Context.BuiltinInfo.isTSBuiltin(BuiltinID)) { switch (Context.getTargetInfo().getTriple().getArch()) { case llvm::Triple::arm: case llvm::Triple::armeb: Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -5510,6 +5510,12 @@ // In C++, the overloadable attribute negates the effects of extern "C". if (!D->isInExternCContext() || D->template hasAttr()) return false; + + // So do CUDA's host/device attributes if overloading is enabled. + if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads && + (D->template hasAttr() || + D->template hasAttr())) + return false; } return D->isExternC(); } @@ -11236,6 +11242,18 @@ FD->addAttr(NoThrowAttr::CreateImplicit(Context, FD->getLocation())); if (Context.BuiltinInfo.isConst(BuiltinID) && !FD->hasAttr()) FD->addAttr(ConstAttr::CreateImplicit(Context, FD->getLocation())); + if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads && + Context.BuiltinInfo.isTSBuiltin(BuiltinID) && + !FD->hasAttr() && !FD->hasAttr()) { + // Target-specific builtins are assumed to be intended for use + // in this particular CUDA compilation mode and should have + // appropriate attribute set so we can enforce CUDA function + // call restrictions. + if (getLangOpts().CUDAIsDevice) + FD->addAttr(CUDADeviceAttr::CreateImplicit(Context, FD->getLocation())); + else + FD->addAttr(CUDAHostAttr::CreateImplicit(Context, FD->getLocation())); + } } IdentifierInfo *Name = FD->getIdentifier(); Index: lib/Sema/SemaExprCXX.cpp =================================================================== --- lib/Sema/SemaExprCXX.cpp +++ lib/Sema/SemaExprCXX.cpp @@ -2256,6 +2256,29 @@ "found an unexpected usual deallocation function"); } + // Filter out unsuitable CUDA functions. + if (getLangOpts().CUDA) { + FunctionDecl *Caller = dyn_cast(CurContext); + // Find the best call preference among the functions in Matches. + CUDAFunctionPreference BestCFP = CFP_Never; + for (auto const &Match : Matches) { + CUDAFunctionPreference P = IdentifyCUDAPreference(Caller, Match); + if (P > BestCFP) + BestCFP = P; + } + + assert(BestCFP != CFP_Never && "No usable CUDA functions."); + // If any suitable functions found, remove all items that are + // *not* suitable. + for (unsigned I = 0, N = Matches.size(); I != N;) + if (IdentifyCUDAPreference(Caller, Matches[I]) != BestCFP) { + Matches[I] = Matches[--N]; + Matches.set_size(N); + } else { + ++I; + } + } + assert(Matches.size() == 1 && "unexpectedly have multiple usual deallocation functions"); return Matches.front(); Index: lib/Sema/SemaOverload.cpp =================================================================== --- lib/Sema/SemaOverload.cpp +++ lib/Sema/SemaOverload.cpp @@ -1067,6 +1067,11 @@ return true; } + if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads) + // Allow overloading of functions with same signature, but + // different CUDA target attributes. + return IdentifyCUDATarget(New) != IdentifyCUDATarget(Old); + // The signatures match; this is not an overload. return false; } @@ -8503,6 +8508,13 @@ return true; } + if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads && + Cand1.Function && Cand2.Function) { + FunctionDecl *Caller = dyn_cast(S.CurContext); + return S.IdentifyCUDAPreference(Caller, Cand1.Function) > + S.IdentifyCUDAPreference(Caller, Cand2.Function); + } + return false; } @@ -9920,6 +9932,10 @@ EliminateAllExceptMostSpecializedTemplate(); } } + + if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads && + Matches.size() > 1) + EliminateSuboptimalCudaMatches(); } private: @@ -10100,6 +10116,32 @@ } } + void EliminateSuboptimalCudaMatches() { + assert(S.getLangOpts().CUDATargetOverloads && + "Should not be called w/o enabled target overloads."); + + // Find the best call preference among the functions in Matches. + FunctionDecl *Caller = dyn_cast(S.CurContext); + Sema::CUDAFunctionPreference BestCFP = Sema::CFP_Never; + for (auto const& Match: Matches) { + Sema::CUDAFunctionPreference P = + S.IdentifyCUDAPreference(Caller, Match.second); + if (P > BestCFP) + BestCFP = P; + } + + assert(BestCFP != Sema::CFP_Never && "No usable CUDA functions."); + // If any suitable functions found, remove all items that are + // *not* suitable. + for (unsigned I = 0, N = Matches.size(); I != N;) + if (S.IdentifyCUDAPreference(Caller, Matches[I].second) != BestCFP) { + Matches[I] = Matches[--N]; + Matches.set_size(N); + } else { + ++I; + } + } + public: void ComplainNoMatchesFound() const { assert(Matches.empty()); Index: test/CodeGenCUDA/function-overload.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/function-overload.cu @@ -0,0 +1,136 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// Make sure we handle target overloads correctly. +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -fcuda-target-overloads -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \ +// RUN: -fcuda-target-overloads -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s + +// Check target overloads handling with disabled call target checks. +// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST \ +// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-HOST-NC %s +// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -emit-llvm \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \ +// RUN: -fcuda-is-device -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \ +// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-DEVICE-NC %s + +#include "Inputs/cuda.h" + +typedef int (*fp_t)(void); + +// CHECK-BOTH-LABEL: define i32 @_Z4dhhdv() +__device__ int dhhd(void) { return 1; } +// CHECK-DEVICE: ret i32 1 + +__host__ int dhhd(void) { return 2; } +// CHECK-HOST: ret i32 2 + +// CHECK-BOTH-LABEL: define i32 @_Z4dhhdUa6deviceUa4hostv() +__host__ __device__ int dhhd(void) { return 3; } +// CHECK-BOTH: ret i32 3 + +// CHECK-HOST-LABEL: define i32 @_Z3hhdv() +__host__ int hhd(void) { return 4; } +// CHECK-HOST: ret i32 4 + +// CHECK-BOTH-LABEL: define i32 @_Z3dhdUa6deviceUa4hostv() +__host__ __device__ int dhd(void) { return 5; } +// CHECK-BOTH: ret i32 5 + +// CHECK-DEVICE-LABEL: define i32 @_Z3dhdv() +__device__ int dhd(void) { return 6; } +// CHECK-DEVICE: ret i32 6 + +// CHECK-BOTH-LABEL: define i32 @_Z3hhdUa6deviceUa4hostv() +__host__ __device__ int hhd(void) { return 7; } +// CHECK-BOTH: ret i32 7 + +// CHECK-DEVICE-LABEL: define i32 @_Z1dv() +__device__ int d(void) { return 8; } +// CHECK-DEVICE: ret i32 8 + +// CHECK-HOST-LABEL: define i32 @_Z1hv() +__host__ int h(void) { return 9; } +// CHECK-HOST: ret i32 9 + +// mangled names of extern "C" __host__ __device__ functions clash +// with those of their __host__/__device__ counterparts, so +// overloading of extern "C" functions can only happen for __host__ +// and __device__ functions -- we never codegen them in the same +// compilation and therefore mangled name conflict is not a problem. + +// CHECK-DEVICE-LABEL: define i32 @chd() +extern "C" __device__ int chd(void) {return 10;} +// CHECK-DEVICE: ret i32 10 + +// CHECK-HOST-LABEL: define i32 @chd() +extern "C" __host__ int chd(void) {return 11;} +// CHECK-HOST: ret i32 11 + + +// CHECK-HOST-LABEL: define void @_Z5hostfv() +__host__ void hostf(void) { + fp_t dhddp = dhhd; // CHECK-HOST: store {{.*}} @_Z4dhhdv, {{.*}} %dhddp, + fp_t hhdp = hhd; // CHECK-HOST: store {{.*}} @_Z3hhdv, {{.*}} %hhdp, + fp_t dhdp = dhd; // CHECK-HOST: store {{.*}} @_Z3dhdUa6deviceUa4hostv, {{.*}} %dhdp, +#if defined (NOCHECKS) + fp_t dp = d; // CHECK-HOST-NC: store {{.*}} @_Z1dv, {{.*}} %dp, +#endif + fp_t hp = h; // CHECK-HOST: store {{.*}} @_Z1hv, {{.*}} %hp, + fp_t chdp = chd; // CHECK-HOST: store {{.*}} @chd, {{.*}} %chdp, + + dhhd(); // CHECK-HOST: call i32 @_Z4dhhdv() + hhd(); // CHECK-HOST: call i32 @_Z3hhdv() + dhd(); // CHECK-HOST: call i32 @_Z3dhdUa6deviceUa4hostv() + h(); // CHECK-HOST: call i32 @_Z1hv() + chd(); // CHECK-HOST: call i32 @chd() +} + +// CHECK-DEVICE-LABEL: define void @_Z7devicefv() +__device__ void devicef(void) { + fp_t dhddp = dhhd; // CHECK-DEVICE: store {{.*}} @_Z4dhhdv, {{.*}} %dhddp, + fp_t hhdp = hhd; // CHECK-DEVICE: store {{.*}} @_Z3hhdUa6deviceUa4hostv, {{.*}} %hhdp, + fp_t dhdp = dhd; // CHECK-DEVICE: store {{.*}} @_Z3dhdv, {{.*}} %dhdp, + fp_t dp = d; // CHECK-DEVICE: store {{.*}} @_Z1dv, {{.*}} %dp, +#if defined (NOCHECKS) + fp_t hp = h; // CHECK-DEVICE-NC: store {{.*}} @_Z1hv, {{.*}} %hp, +#endif + fp_t chdp = chd; // CHECK-DEVICE: store {{.*}} @chd, {{.*}} %chdp, + + dhhd(); // CHECK-DEVICE: call i32 @_Z4dhhdv() + hhd(); // CHECK-DEVICE: call i32 @_Z3hhdUa6deviceUa4hostv() + dhd(); // CHECK-DEVICE: call i32 @_Z3dhdv() + d(); // CHECK-DEVICE: call i32 @_Z1dv() + chd(); // CHECK-DEVICE: call i32 @chd() +} + +// CHECK-BOTH-LABEL: define void @_Z11hostdevicefUa6deviceUa4hostv() +__host__ __device__ void hostdevicef(void) { + fp_t dhddp = dhhd; // CHECK-BOTH: store {{.*}} @_Z4dhhdUa6deviceUa4hostv, {{.*}} %dhddp, + fp_t hhdp = hhd; // CHECK-BOTH: store {{.*}} @_Z3hhdUa6deviceUa4hostv, {{.*}} %hhdp, + fp_t dhdp = dhd; // CHECK-BOTH: store {{.*}} @_Z3dhdUa6deviceUa4hostv, {{.*}} %dhdp, +#if defined (NOCHECKS) + fp_t dp = d; // CHECK-BOTH-NC: store {{.*}} @_Z1dv, {{.*}} %dp, + fp_t hp = h; // CHECK-BOTH-NC: store {{.*}} @_Z1hv, {{.*}} %hp, +#endif + // chd would be __host__ or __device__ depending on compilation mode. + fp_t chdp = chd; // CHECK-BOTH: store {{.*}} @chd, {{.*}} %chdp, + + dhhd(); // CHECK-BOTH: call i32 @_Z4dhhdUa6deviceUa4hostv() + hhd(); // CHECK-BOTH: call i32 @_Z3hhdUa6deviceUa4hostv() + dhd(); // CHECK-BOTH: call i32 @_Z3dhdUa6deviceUa4hostv() + +#if defined(NOCHECKS) || defined(__CUDA_ARCH__) + d(); // CHECK-BOTH-NC: call i32 @_Z1dv() +#endif +#if defined(NOCHECKS) || !defined(__CUDA_ARCH__) + h(); // CHECK-BOTH-NC: call i32 @_Z1hv() +#endif + chd(); // CHECK-BOTH: call i32 @chd() +} Index: test/SemaCUDA/builtins.cu =================================================================== --- /dev/null +++ test/SemaCUDA/builtins.cu @@ -0,0 +1,36 @@ +// Tests that target-specific builtins have appropriate host/device +// attributes and that CUDA call restrictions are enforced. Also +// verify that non-target builtins can be used from both host and +// device functions. +// +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fcuda-target-overloads -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple nvptx64-unknown-cuda -fcuda-is-device \ +// RUN: -fcuda-target-overloads -fsyntax-only -verify %s + + +#ifdef __CUDA_ARCH__ +// Device-side builtins are not allowed to be called from host functions. +void hf() { + int x = __builtin_ptx_read_tid_x(); // expected-note {{'__builtin_ptx_read_tid_x' declared here}} + // expected-error@-1 {{reference to __device__ function '__builtin_ptx_read_tid_x' in __host__ function}} + x = __builtin_abs(1); +} +__attribute__((device)) void df() { + int x = __builtin_ptx_read_tid_x(); + x = __builtin_abs(1); +} +#else +// Host-side builtins are not allowed to be called from device functions. +__attribute__((device)) void df() { + int x = __builtin_ia32_rdtsc(); // expected-note {{'__builtin_ia32_rdtsc' declared here}} + // expected-error@-1 {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}} + x = __builtin_abs(1); +} +void hf() { + int x = __builtin_ia32_rdtsc(); + x = __builtin_abs(1); +} +#endif Index: test/SemaCUDA/function-overload.cu =================================================================== --- /dev/null +++ test/SemaCUDA/function-overload.cu @@ -0,0 +1,173 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// Make sure we handle target overloads correctly. +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -fsyntax-only -fcuda-target-overloads -verify %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \ +// RUN: -fsyntax-only -fcuda-target-overloads -fcuda-is-device -verify %s + +// Check target overloads handling with disabled call target checks. +// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -verify %s +// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -fsyntax-only \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \ +// RUN: -fcuda-is-device -verify %s + +#include "Inputs/cuda.h" + +typedef int (*fp_t)(void); + +__device__ int dhhd(void) { return 2; } +__host__ int dhhd(void) { return 1; } // expected-note {{previous definition is here}} +int dhhd(void) { return 1; } // expected-error {{redefinition of 'dhhd'}} +__host__ __device__ int dhhd(void) { return 3; } + +__host__ int hhd(void) { return 4; } +__host__ __device__ int dhd(void) { return 5; } + +__device__ int dhd(void) { return 6; } +__host__ __device__ int hhd(void) { return 7; } + +__device__ int d(void) { return 8; } +__host__ int h(void) { return 9; } +__global__ void g(void) {} + +extern "C" __device__ int chd(void) {return 10;} +extern "C" __host__ int chd(void) {return 11;} // expected-note {{previous definition is here}} +extern "C" int chd(void) {return 11;} // expected-error {{redefinition of 'chd'}} +extern "C" __host__ __device__ int chd(void) {return 12;} // expected-note {{previous definition is here}} +extern "C" __host__ __device__ int chd(void) {return 13;} // expected-error {{redefinition of 'chd'}} + +__host__ void hostf(void) { + fp_t dhddp = dhhd; + fp_t hhdp = hhd; + fp_t dhdp = dhd; + fp_t dp = d; +#if !defined(NOCHECKS) + // expected-error@-2 {{reference to __device__ function 'd' in __host__ function}} + // expected-note@32 {{'d' declared here}} +#endif + fp_t hp = h; + + dhhd(); + hhd(); + dhd(); + chd(); + d(); +#if !defined(NOCHECKS) + // expected-error@-2 {{no matching function for call to 'd'}} + // expected-note@32 {{candidate function not viable: call to __device__ function from __host__ function}} +#endif + h(); + + g(); // expected-error {{call to global function g not configured}} + g<<<0,0>>>(); +} + +__device__ void devicef(void) { + fp_t dhddp = dhhd; + fp_t hhdp = hhd; + fp_t dhdp = dhd; + fp_t dp = d; + fp_t hp = h; +#if !defined(NOCHECKS) + // expected-error@-2 {{reference to __host__ function 'h' in __device__ function}} + // expected-note@33 {{'h' declared here}} +#endif + + dhhd(); + hhd(); + dhd(); + chd(); + d(); + h(); + g(); +#if !defined(NOCHECKS) + // expected-error@-3 {{no matching function for call to 'h'}} + // expected-note@33 {{candidate function not viable: call to __host__ function from __device__ function}} +#endif + // expected-error@-5 {{no matching function for call to 'g'}} + // expected-note@34 {{candidate function not viable: call to __global__ function from __device__ function}} + g<<<0,0>>>(); + // expected-error@-1 {{reference to __global__ function 'g' in __device__ function}} + // expected-note@34 {{'g' declared here}} +} + +__global__ void globalf(void) { + fp_t dhddp = dhhd; + fp_t hhdp = hhd; + fp_t dhdp = dhd; + fp_t dp = d; + fp_t hp = h; +#if !defined(NOCHECKS) + // expected-error@-2 {{reference to __host__ function 'h' in __global__ function}} + // expected-note@33 {{'h' declared here}} +#endif + + dhhd(); + hhd(); + dhd(); + chd(); + d(); + h(); +#if !defined(NOCHECKS) + // expected-error@-2 {{no matching function for call to 'h'}} + // expected-note@33 {{candidate function not viable: call to __host__ function from __global__ function}} +#endif + g(); + // expected-error@-1 {{no matching function for call to 'g'}} + // expected-note@34 {{candidate function not viable: call to __global__ function from __global__ function}} + g<<<0,0>>>(); + // expected-error@-1 {{reference to __global__ function 'g' in __global__ function}} + // expected-note@34 {{'g' declared here}} + +} + +__host__ __device__ void hostdevicef(void) { + fp_t dhddp = dhhd; + fp_t hhdp = hhd; + fp_t dhdp = dhd; + + fp_t dp = d; + fp_t hp = h; +#if !defined(NOCHECKS) +#if !defined(__CUDA_ARCH__) + // expected-error@-4 {{reference to __device__ function 'd' in __host__ __device__ function}} + // expected-note@32 {{'d' declared here}} +#else + // expected-error@-6 {{reference to __host__ function 'h' in __host__ __device__ function}} + // expected-note@33 {{'h' declared here}} +#endif +#endif + + dhhd(); + hhd(); + dhd(); + chd(); + + d(); + h(); + g(); + g<<<0,0>>>(); +#if !defined(__CUDA_ARCH__) +#if !defined(NOCHECKS) + // expected-error@-6 {{no matching function for call to 'd'}} + // expected-note@32 {{candidate function not viable: call to __device__ function from __host__ __device__ function}} +#endif + // expected-error@-7 {{call to global function g not configured}} +#else +#if !defined(NOCHECKS) + // expected-error@-11 {{no matching function for call to 'h'}} + // expected-note@33 {{candidate function not viable: call to __host__ function from __host__ __device__ function}} +#endif + // expected-error@-13 {{no matching function for call to 'g'}} + // expected-note@34 {{candidate function not viable: call to __global__ function from __host__ __device__ function}} + // expected-error@-14 {{reference to __global__ function 'g' in __host__ __device__ function}} + // expected-note@34 {{'g' declared here}} +#endif // __CUDA_ARCH__ +} + +// Test for address of overloaded function resolution in the global context. +typedef int (*fp_t)(void); +fp_t dhhdp = dhhd; Index: test/SemaCUDA/implicit-intrinsic.cu =================================================================== --- test/SemaCUDA/implicit-intrinsic.cu +++ test/SemaCUDA/implicit-intrinsic.cu @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -std=gnu++11 -triple nvptx64-unknown-unknown -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -fsyntax-only -verify %s #include "Inputs/cuda.h" // expected-no-diagnostics __device__ void __threadfence_system() { - // This shouldn't produce an error, since __nvvm_membar_sys is inferred to - // be __host__ __device__ and thus callable from device code. + // This shouldn't produce an error, since __nvvm_membar_sys should be + // __device__ and thus callable from device code. __nvvm_membar_sys(); }