Index: cfe/trunk/include/clang/Basic/LangOptions.def =================================================================== --- cfe/trunk/include/clang/Basic/LangOptions.def +++ cfe/trunk/include/clang/Basic/LangOptions.def @@ -166,6 +166,7 @@ LANGOPT(CUDAIsDevice , 1, 0, "Compiling for CUDA device") LANGOPT(CUDAAllowHostCallsFromHostDevice, 1, 0, "Allow host device functions to call host functions") LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)") +LANGOPT(CUDATargetOverloads, 1, 0, "Enable function overloads based on CUDA target attributes") LANGOPT(CUDAUsesLibDevice , 1, 0, "Selectively link and internalize bitcode.") LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators") Index: cfe/trunk/include/clang/Driver/CC1Options.td =================================================================== --- cfe/trunk/include/clang/Driver/CC1Options.td +++ cfe/trunk/include/clang/Driver/CC1Options.td @@ -659,6 +659,8 @@ HelpText<"Disable all cross-target (host, device, etc.) call checks in CUDA">; def fcuda_include_gpubinary : Separate<["-"], "fcuda-include-gpubinary">, HelpText<"Incorporate CUDA device-side binary into host object file.">; +def fcuda_target_overloads : Flag<["-"], "fcuda-target-overloads">, + HelpText<"Enable function overloads based on CUDA target attributes.">; def fcuda_uses_libdevice : Flag<["-"], "fcuda-uses-libdevice">, HelpText<"Selectively link and internalize bitcode.">; Index: cfe/trunk/include/clang/Sema/Sema.h =================================================================== --- cfe/trunk/include/clang/Sema/Sema.h +++ cfe/trunk/include/clang/Sema/Sema.h @@ -8613,8 +8613,37 @@ CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D); + enum CUDAFunctionPreference { + CFP_Never, // Invalid caller/callee combination. + CFP_LastResort, // Lowest priority. Only in effect if + // LangOpts.CUDADisableTargetCallChecks is true. + CFP_Fallback, // Low priority caller/callee combination + CFP_Best, // Preferred caller/callee combination + }; + + /// Identifies relative preference of a given Caller/Callee + /// combination, based on their host/device attributes. + /// \param Caller function which needs address of \p Callee. + /// nullptr in case of global context. + /// \param Callee target function + /// + /// \returns preference value for particular Caller/Callee combination. + CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller, + const FunctionDecl *Callee); + bool CheckCUDATarget(const FunctionDecl *Caller, const FunctionDecl *Callee); + /// Finds a function in \p Matches with highest calling priority + /// from \p Caller context and erases all functions with lower + /// calling priority. + void EraseUnwantedCUDAMatches(const FunctionDecl *Caller, + SmallVectorImpl &Matches); + void EraseUnwantedCUDAMatches(const FunctionDecl *Caller, + SmallVectorImpl &Matches); + void EraseUnwantedCUDAMatches( + const FunctionDecl *Caller, + SmallVectorImpl> &Matches); + /// Given a implicit special member, infer its CUDA target from the /// calls it needs to make to underlying base/field special members. /// \param ClassDecl the class for which the member is being created. Index: cfe/trunk/lib/Frontend/CompilerInvocation.cpp =================================================================== --- cfe/trunk/lib/Frontend/CompilerInvocation.cpp +++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp @@ -1416,6 +1416,9 @@ if (Args.hasArg(OPT_fcuda_disable_target_call_checks)) Opts.CUDADisableTargetCallChecks = 1; + if (Args.hasArg(OPT_fcuda_target_overloads)) + Opts.CUDATargetOverloads = 1; + if (Opts.ObjC1) { if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { StringRef value = arg->getValue(); Index: cfe/trunk/lib/Sema/SemaCUDA.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaCUDA.cpp +++ cfe/trunk/lib/Sema/SemaCUDA.cpp @@ -60,8 +60,101 @@ return CFT_Host; } +// * CUDA Call preference table +// +// F - from, +// T - to +// Ph - preference in host mode +// Pd - preference in device mode +// H - handled in (x) +// Preferences: b-best, f-fallback, l-last resort, n-never. +// +// | F | T | Ph | Pd | H | +// |----+----+----+----+-----+ +// | d | d | b | b | (b) | +// | d | g | n | n | (a) | +// | d | h | l | l | (e) | +// | d | hd | f | f | (c) | +// | g | d | b | b | (b) | +// | g | g | n | n | (a) | +// | g | h | l | l | (e) | +// | g | hd | f | f | (c) | +// | h | d | l | l | (e) | +// | h | g | b | b | (b) | +// | h | h | b | b | (b) | +// | h | hd | f | f | (c) | +// | hd | d | l | f | (d) | +// | hd | g | f | n |(d/a)| +// | hd | h | f | l | (d) | +// | hd | hd | b | b | (b) | + +Sema::CUDAFunctionPreference +Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, + const FunctionDecl *Callee) { + assert(getLangOpts().CUDATargetOverloads && + "Should not be called w/o enabled target overloads."); + + assert(Callee && "Callee must be valid."); + CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); + CUDAFunctionTarget CallerTarget = + (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host; + + // If one of the targets is invalid, the check always fails, no matter what + // the other target is. + if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) + return CFP_Never; + + // (a) Can't call global from some contexts until we support CUDA's + // dynamic parallelism. + if (CalleeTarget == CFT_Global && + (CallerTarget == CFT_Global || CallerTarget == CFT_Device || + (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice))) + return CFP_Never; + + // (b) Best case scenarios + if (CalleeTarget == CallerTarget || + (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || + (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) + return CFP_Best; + + // (c) Calling HostDevice is OK as a fallback that works for everyone. + if (CalleeTarget == CFT_HostDevice) + return CFP_Fallback; + + // Figure out what should be returned 'last resort' cases. Normally + // those would not be allowed, but we'll consider them if + // CUDADisableTargetCallChecks is true. + CUDAFunctionPreference QuestionableResult = + getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never; + + // (d) HostDevice behavior depends on compilation mode. + if (CallerTarget == CFT_HostDevice) { + // Calling a function that matches compilation mode is OK. + // Calling a function from the other side is frowned upon. + if (getLangOpts().CUDAIsDevice) + return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult; + else + return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global) + ? CFP_Fallback + : QuestionableResult; + } + + // (e) Calling across device/host boundary is not something you should do. + if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || + (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || + (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) + return QuestionableResult; + + llvm_unreachable("All cases should've been handled by now."); +} + bool Sema::CheckCUDATarget(const FunctionDecl *Caller, const FunctionDecl *Callee) { + // With target overloads enabled, we only disallow calling + // combinations with CFP_Never. + if (getLangOpts().CUDATargetOverloads) + return IdentifyCUDAPreference(Caller,Callee) == CFP_Never; + // The CUDADisableTargetCallChecks short-circuits this check: we assume all // cross-target calls are valid. if (getLangOpts().CUDADisableTargetCallChecks) @@ -117,6 +210,57 @@ return false; } +template +static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller, + llvm::SmallVectorImpl &Matches, + FetchDeclFn FetchDecl) { + assert(S.getLangOpts().CUDATargetOverloads && + "Should not be called w/o enabled target overloads."); + if (Matches.size() <= 1) + return; + + // Find the best call preference among the functions in Matches. + Sema::CUDAFunctionPreference P, BestCFP = Sema::CFP_Never; + for (auto const &Match : Matches) { + P = S.IdentifyCUDAPreference(Caller, FetchDecl(Match)); + if (P > BestCFP) + BestCFP = P; + } + + // Erase all functions with lower priority. + for (unsigned I = 0, N = Matches.size(); I != N;) + if (S.IdentifyCUDAPreference(Caller, FetchDecl(Matches[I])) < BestCFP) { + Matches[I] = Matches[--N]; + Matches.resize(N); + } else { + ++I; + } +} + +void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller, + SmallVectorImpl &Matches){ + EraseUnwantedCUDAMatchesImpl( + *this, Caller, Matches, [](const FunctionDecl *item) { return item; }); +} + +void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller, + SmallVectorImpl &Matches) { + EraseUnwantedCUDAMatchesImpl( + *this, Caller, Matches, [](const DeclAccessPair &item) { + return dyn_cast(item.getDecl()); + }); +} + +void Sema::EraseUnwantedCUDAMatches( + const FunctionDecl *Caller, + SmallVectorImpl> &Matches){ + EraseUnwantedCUDAMatchesImpl>( + *this, Caller, Matches, + [](const std::pair &item) { + return dyn_cast(item.second); + }); +} + /// When an implicitly-declared special member has to invoke more than one /// base/field special member, conflicts may occur in the targets of these /// members. For example, if one base's member __host__ and another's is Index: cfe/trunk/lib/Sema/SemaDecl.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaDecl.cpp +++ cfe/trunk/lib/Sema/SemaDecl.cpp @@ -5515,6 +5515,12 @@ // In C++, the overloadable attribute negates the effects of extern "C". if (!D->isInExternCContext() || D->template hasAttr()) return false; + + // So do CUDA's host/device attributes if overloading is enabled. + if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads && + (D->template hasAttr() || + D->template hasAttr())) + return false; } return D->isExternC(); } Index: cfe/trunk/lib/Sema/SemaExprCXX.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaExprCXX.cpp +++ cfe/trunk/lib/Sema/SemaExprCXX.cpp @@ -2265,6 +2265,9 @@ "found an unexpected usual deallocation function"); } + if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads) + EraseUnwantedCUDAMatches(dyn_cast(CurContext), Matches); + assert(Matches.size() == 1 && "unexpectedly have multiple usual deallocation functions"); return Matches.front(); @@ -2296,6 +2299,9 @@ Matches.push_back(F.getPair()); } + if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads) + EraseUnwantedCUDAMatches(dyn_cast(CurContext), Matches); + // There's exactly one suitable operator; pick it. if (Matches.size() == 1) { Operator = cast(Matches[0]->getUnderlyingDecl()); Index: cfe/trunk/lib/Sema/SemaOverload.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaOverload.cpp +++ cfe/trunk/lib/Sema/SemaOverload.cpp @@ -1072,6 +1072,25 @@ return true; } + if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads) { + CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New), + OldTarget = IdentifyCUDATarget(Old); + if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global) + return false; + + assert((OldTarget != CFT_InvalidTarget) && "Unexpected invalid target."); + + // Don't allow mixing of HD with other kinds. This guarantees that + // we have only one viable function with this signature on any + // side of CUDA compilation . + if ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice)) + return false; + + // Allow overloading of functions with same signature, but + // different CUDA target attributes. + return NewTarget != OldTarget; + } + // The signatures match; this is not an overload. return false; } @@ -8508,6 +8527,13 @@ return true; } + if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads && + Cand1.Function && Cand2.Function) { + FunctionDecl *Caller = dyn_cast(S.CurContext); + return S.IdentifyCUDAPreference(Caller, Cand1.Function) > + S.IdentifyCUDAPreference(Caller, Cand2.Function); + } + return false; } @@ -9925,6 +9951,10 @@ EliminateAllExceptMostSpecializedTemplate(); } } + + if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads && + Matches.size() > 1) + EliminateSuboptimalCudaMatches(); } private: @@ -10100,11 +10130,15 @@ ++I; else { Matches[I] = Matches[--N]; - Matches.set_size(N); + Matches.resize(N); } } } + void EliminateSuboptimalCudaMatches() { + S.EraseUnwantedCUDAMatches(dyn_cast(S.CurContext), Matches); + } + public: void ComplainNoMatchesFound() const { assert(Matches.empty()); Index: cfe/trunk/test/CodeGenCUDA/function-overload.cu =================================================================== --- cfe/trunk/test/CodeGenCUDA/function-overload.cu +++ cfe/trunk/test/CodeGenCUDA/function-overload.cu @@ -0,0 +1,214 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// Make sure we handle target overloads correctly. +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -fcuda-target-overloads -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \ +// RUN: -fcuda-target-overloads -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s + +// Check target overloads handling with disabled call target checks. +// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST \ +// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-HOST-NC %s +// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -emit-llvm \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \ +// RUN: -fcuda-is-device -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \ +// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-DEVICE-NC %s + +#include "Inputs/cuda.h" + +typedef int (*fp_t)(void); +typedef void (*gp_t)(void); + +// CHECK-HOST: @hp = global i32 ()* @_Z1hv +// CHECK-HOST: @chp = global i32 ()* @ch +// CHECK-HOST: @dhp = global i32 ()* @_Z2dhv +// CHECK-HOST: @cdhp = global i32 ()* @cdh +// CHECK-HOST: @gp = global void ()* @_Z1gv + +// CHECK-BOTH-LABEL: define i32 @_Z2dhv() +__device__ int dh(void) { return 1; } +// CHECK-DEVICE: ret i32 1 +__host__ int dh(void) { return 2; } +// CHECK-HOST: ret i32 2 + +// CHECK-BOTH-LABEL: define i32 @_Z2hdv() +__host__ __device__ int hd(void) { return 3; } +// CHECK-BOTH: ret i32 3 + +// CHECK-DEVICE-LABEL: define i32 @_Z1dv() +__device__ int d(void) { return 8; } +// CHECK-DEVICE: ret i32 8 + +// CHECK-HOST-LABEL: define i32 @_Z1hv() +__host__ int h(void) { return 9; } +// CHECK-HOST: ret i32 9 + +// CHECK-BOTH-LABEL: define void @_Z1gv() +__global__ void g(void) {} +// CHECK-BOTH: ret void + +// mangled names of extern "C" __host__ __device__ functions clash +// with those of their __host__/__device__ counterparts, so +// overloading of extern "C" functions can only happen for __host__ +// and __device__ functions -- we never codegen them in the same +// compilation and therefore mangled name conflict is not a problem. + +// CHECK-BOTH-LABEL: define i32 @cdh() +extern "C" __device__ int cdh(void) {return 10;} +// CHECK-DEVICE: ret i32 10 +extern "C" __host__ int cdh(void) {return 11;} +// CHECK-HOST: ret i32 11 + +// CHECK-DEVICE-LABEL: define i32 @cd() +extern "C" __device__ int cd(void) {return 12;} +// CHECK-DEVICE: ret i32 12 + +// CHECK-HOST-LABEL: define i32 @ch() +extern "C" __host__ int ch(void) {return 13;} +// CHECK-HOST: ret i32 13 + +// CHECK-BOTH-LABEL: define i32 @chd() +extern "C" __host__ __device__ int chd(void) {return 14;} +// CHECK-BOTH: ret i32 14 + +// CHECK-HOST-LABEL: define void @_Z5hostfv() +__host__ void hostf(void) { +#if defined (NOCHECKS) + fp_t dp = d; // CHECK-HOST-NC: store {{.*}} @_Z1dv, {{.*}} %dp, + fp_t cdp = cd; // CHECK-HOST-NC: store {{.*}} @cd, {{.*}} %cdp, +#endif + fp_t hp = h; // CHECK-HOST: store {{.*}} @_Z1hv, {{.*}} %hp, + fp_t chp = ch; // CHECK-HOST: store {{.*}} @ch, {{.*}} %chp, + fp_t dhp = dh; // CHECK-HOST: store {{.*}} @_Z2dhv, {{.*}} %dhp, + fp_t cdhp = cdh; // CHECK-HOST: store {{.*}} @cdh, {{.*}} %cdhp, + fp_t hdp = hd; // CHECK-HOST: store {{.*}} @_Z2hdv, {{.*}} %hdp, + fp_t chdp = chd; // CHECK-HOST: store {{.*}} @chd, {{.*}} %chdp, + gp_t gp = g; // CHECK-HOST: store {{.*}} @_Z1gv, {{.*}} %gp, + +#if defined (NOCHECKS) + d(); // CHECK-HOST-NC: call i32 @_Z1dv() + cd(); // CHECK-HOST-NC: call i32 @cd() +#endif + h(); // CHECK-HOST: call i32 @_Z1hv() + ch(); // CHECK-HOST: call i32 @ch() + dh(); // CHECK-HOST: call i32 @_Z2dhv() + cdh(); // CHECK-HOST: call i32 @cdh() + g<<<0,0>>>(); // CHECK-HOST: call void @_Z1gv() +} + +// CHECK-DEVICE-LABEL: define void @_Z7devicefv() +__device__ void devicef(void) { + fp_t dp = d; // CHECK-DEVICE: store {{.*}} @_Z1dv, {{.*}} %dp, + fp_t cdp = cd; // CHECK-DEVICE: store {{.*}} @cd, {{.*}} %cdp, +#if defined (NOCHECKS) + fp_t hp = h; // CHECK-DEVICE-NC: store {{.*}} @_Z1hv, {{.*}} %hp, + fp_t chp = ch; // CHECK-DEVICE-NC: store {{.*}} @ch, {{.*}} %chp, +#endif + fp_t dhp = dh; // CHECK-DEVICE: store {{.*}} @_Z2dhv, {{.*}} %dhp, + fp_t cdhp = cdh; // CHECK-DEVICE: store {{.*}} @cdh, {{.*}} %cdhp, + fp_t hdp = hd; // CHECK-DEVICE: store {{.*}} @_Z2hdv, {{.*}} %hdp, + fp_t chdp = chd; // CHECK-DEVICE: store {{.*}} @chd, {{.*}} %chdp, + + d(); // CHECK-DEVICE: call i32 @_Z1dv() + cd(); // CHECK-DEVICE: call i32 @cd() +#if defined (NOCHECKS) + h(); // CHECK-DEVICE-NC: call i32 @_Z1hv() + ch(); // CHECK-DEVICE-NC: call i32 @ch() +#endif + dh(); // CHECK-DEVICE: call i32 @_Z2dhv() + cdh(); // CHECK-DEVICE: call i32 @cdh() +} + +// CHECK-BOTH-LABEL: define void @_Z11hostdevicefv() +__host__ __device__ void hostdevicef(void) { +#if defined (NOCHECKS) + fp_t dp = d; // CHECK-BOTH-NC: store {{.*}} @_Z1dv, {{.*}} %dp, + fp_t cdp = cd; // CHECK-BOTH-NC: store {{.*}} @cd, {{.*}} %cdp, + fp_t hp = h; // CHECK-BOTH-NC: store {{.*}} @_Z1hv, {{.*}} %hp, + fp_t chp = ch; // CHECK-BOTH-NC: store {{.*}} @ch, {{.*}} %chp, +#endif + fp_t dhp = dh; // CHECK-BOTH: store {{.*}} @_Z2dhv, {{.*}} %dhp, + fp_t cdhp = cdh; // CHECK-BOTH: store {{.*}} @cdh, {{.*}} %cdhp, + fp_t hdp = hd; // CHECK-BOTH: store {{.*}} @_Z2hdv, {{.*}} %hdp, + fp_t chdp = chd; // CHECK-BOTH: store {{.*}} @chd, {{.*}} %chdp, +#if defined (NOCHECKS) && !defined(__CUDA_ARCH__) + gp_t gp = g; // CHECK-HOST-NC: store {{.*}} @_Z1gv, {{.*}} %gp, +#endif + +#if defined (NOCHECKS) + d(); // CHECK-BOTH-NC: call i32 @_Z1dv() + cd(); // CHECK-BOTH-NC: call i32 @cd() + h(); // CHECK-BOTH-NC: call i32 @_Z1hv() + ch(); // CHECK-BOTH-NC: call i32 @ch() +#endif + dh(); // CHECK-BOTH: call i32 @_Z2dhv() + cdh(); // CHECK-BOTH: call i32 @cdh() +#if defined (NOCHECKS) && !defined(__CUDA_ARCH__) + g<<<0,0>>>(); // CHECK-HOST-NC: call void @_Z1gv() +#endif +} + +// Test for address of overloaded function resolution in the global context. +fp_t hp = h; +fp_t chp = ch; +fp_t dhp = dh; +fp_t cdhp = cdh; +gp_t gp = g; + +int x; +// Check constructors/destructors for D/H functions +struct s_cd_dh { + __host__ s_cd_dh() { x = 11; } + __device__ s_cd_dh() { x = 12; } + __host__ ~s_cd_dh() { x = 21; } + __device__ ~s_cd_dh() { x = 22; } +}; + +struct s_cd_hd { + __host__ __device__ s_cd_hd() { x = 31; } + __host__ __device__ ~s_cd_hd() { x = 32; } +}; + +// CHECK-BOTH: define void @_Z7wrapperv +#if defined(__CUDA_ARCH__) +__device__ +#else +__host__ +#endif +void wrapper() { + s_cd_dh scddh; + // CHECK-BOTH: call void @_ZN7s_cd_dhC1Ev( + s_cd_hd scdhd; + // CHECK-BOTH: call void @_ZN7s_cd_hdC1Ev + + // CHECK-BOTH: call void @_ZN7s_cd_hdD1Ev( + // CHECK-BOTH: call void @_ZN7s_cd_dhD1Ev( +} +// CHECK-BOTH: ret void + +// Now it's time to check what's been generated for the methods we used. + +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhC2Ev( +// CHECK-HOST: store i32 11, +// CHECK-DEVICE: store i32 12, +// CHECK-BOTH: ret void + +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2Ev( +// CHECK-BOTH: store i32 31, +// CHECK-BOTH: ret void + +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev( +// CHECK-BOTH: store i32 32, +// CHECK-BOTH: ret void + +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhD2Ev( +// CHECK-HOST: store i32 21, +// CHECK-DEVICE: store i32 22, +// CHECK-BOTH: ret void + Index: cfe/trunk/test/SemaCUDA/function-overload.cu =================================================================== --- cfe/trunk/test/SemaCUDA/function-overload.cu +++ cfe/trunk/test/SemaCUDA/function-overload.cu @@ -0,0 +1,317 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// Make sure we handle target overloads correctly. +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -fsyntax-only -fcuda-target-overloads -verify %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \ +// RUN: -fsyntax-only -fcuda-target-overloads -fcuda-is-device -verify %s + +// Check target overloads handling with disabled call target checks. +// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -verify %s +// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -fsyntax-only \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \ +// RUN: -fcuda-is-device -verify %s + +#include "Inputs/cuda.h" + +typedef int (*fp_t)(void); +typedef void (*gp_t)(void); + +// Host and unattributed functions can't be overloaded +__host__ int hh(void) { return 1; } // expected-note {{previous definition is here}} +int hh(void) { return 1; } // expected-error {{redefinition of 'hh'}} + +// H/D overloading is OK +__host__ int dh(void) { return 2; } +__device__ int dh(void) { return 2; } + +// H/HD and D/HD are not allowed +__host__ __device__ int hdh(void) { return 5; } // expected-note {{previous definition is here}} +__host__ int hdh(void) { return 4; } // expected-error {{redefinition of 'hdh'}} + +__host__ int hhd(void) { return 4; } // expected-note {{previous definition is here}} +__host__ __device__ int hhd(void) { return 5; } // expected-error {{redefinition of 'hhd'}} +// expected-warning@-1 {{attribute declaration must precede definition}} +// expected-note@-3 {{previous definition is here}} + +__host__ __device__ int hdd(void) { return 7; } // expected-note {{previous definition is here}} +__device__ int hdd(void) { return 6; } // expected-error {{redefinition of 'hdd'}} + +__device__ int dhd(void) { return 6; } // expected-note {{previous definition is here}} +__host__ __device__ int dhd(void) { return 7; } // expected-error {{redefinition of 'dhd'}} +// expected-warning@-1 {{attribute declaration must precede definition}} +// expected-note@-3 {{previous definition is here}} + +// Same tests for extern "C" functions +extern "C" __host__ int chh(void) {return 11;} // expected-note {{previous definition is here}} +extern "C" int chh(void) {return 11;} // expected-error {{redefinition of 'chh'}} + +// H/D overloading is OK +extern "C" __device__ int cdh(void) {return 10;} +extern "C" __host__ int cdh(void) {return 11;} + +// H/HD and D/HD overloading is not allowed. +extern "C" __host__ __device__ int chhd1(void) {return 12;} // expected-note {{previous definition is here}} +extern "C" __host__ int chhd1(void) {return 13;} // expected-error {{redefinition of 'chhd1'}} + +extern "C" __host__ int chhd2(void) {return 13;} // expected-note {{previous definition is here}} +extern "C" __host__ __device__ int chhd2(void) {return 12;} // expected-error {{redefinition of 'chhd2'}} +// expected-warning@-1 {{attribute declaration must precede definition}} +// expected-note@-3 {{previous definition is here}} + +// Helper functions to verify calling restrictions. +__device__ int d(void) { return 8; } +__host__ int h(void) { return 9; } +__global__ void g(void) {} +extern "C" __device__ int cd(void) {return 10;} +extern "C" __host__ int ch(void) {return 11;} + +__host__ void hostf(void) { + fp_t dp = d; + fp_t cdp = cd; +#if !defined(NOCHECKS) + // expected-error@-3 {{reference to __device__ function 'd' in __host__ function}} + // expected-note@65 {{'d' declared here}} + // expected-error@-4 {{reference to __device__ function 'cd' in __host__ function}} + // expected-note@68 {{'cd' declared here}} +#endif + fp_t hp = h; + fp_t chp = ch; + fp_t dhp = dh; + fp_t cdhp = cdh; + gp_t gp = g; + + d(); + cd(); +#if !defined(NOCHECKS) + // expected-error@-3 {{no matching function for call to 'd'}} + // expected-note@65 {{candidate function not viable: call to __device__ function from __host__ function}} + // expected-error@-4 {{no matching function for call to 'cd'}} + // expected-note@68 {{candidate function not viable: call to __device__ function from __host__ function}} +#endif + h(); + ch(); + dh(); + cdh(); + g(); // expected-error {{call to global function g not configured}} + g<<<0,0>>>(); +} + + +__device__ void devicef(void) { + fp_t dp = d; + fp_t cdp = cd; + fp_t hp = h; + fp_t chp = ch; +#if !defined(NOCHECKS) + // expected-error@-3 {{reference to __host__ function 'h' in __device__ function}} + // expected-note@66 {{'h' declared here}} + // expected-error@-4 {{reference to __host__ function 'ch' in __device__ function}} + // expected-note@69 {{'ch' declared here}} +#endif + fp_t dhp = dh; + fp_t cdhp = cdh; + gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __device__ function}} + // expected-note@67 {{'g' declared here}} + + d(); + cd(); + h(); + ch(); +#if !defined(NOCHECKS) + // expected-error@-3 {{no matching function for call to 'h'}} + // expected-note@66 {{candidate function not viable: call to __host__ function from __device__ function}} + // expected-error@-4 {{no matching function for call to 'ch'}} + // expected-note@69 {{candidate function not viable: call to __host__ function from __device__ function}} +#endif + dh(); + cdh(); + g(); // expected-error {{no matching function for call to 'g'}} + // expected-note@67 {{candidate function not viable: call to __global__ function from __device__ function}} + g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __device__ function}} + // expected-note@67 {{'g' declared here}} +} + +__global__ void globalf(void) { + fp_t dp = d; + fp_t cdp = cd; + fp_t hp = h; + fp_t chp = ch; +#if !defined(NOCHECKS) + // expected-error@-3 {{reference to __host__ function 'h' in __global__ function}} + // expected-note@66 {{'h' declared here}} + // expected-error@-4 {{reference to __host__ function 'ch' in __global__ function}} + // expected-note@69 {{'ch' declared here}} +#endif + fp_t dhp = dh; + fp_t cdhp = cdh; + gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __global__ function}} + // expected-note@67 {{'g' declared here}} + + d(); + cd(); + h(); + ch(); +#if !defined(NOCHECKS) + // expected-error@-3 {{no matching function for call to 'h'}} + // expected-note@66 {{candidate function not viable: call to __host__ function from __global__ function}} + // expected-error@-4 {{no matching function for call to 'ch'}} + // expected-note@69 {{candidate function not viable: call to __host__ function from __global__ function}} +#endif + dh(); + cdh(); + g(); // expected-error {{no matching function for call to 'g'}} + // expected-note@67 {{candidate function not viable: call to __global__ function from __global__ function}} + g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __global__ function}} + // expected-note@67 {{'g' declared here}} +} + +__host__ __device__ void hostdevicef(void) { + fp_t dp = d; + fp_t cdp = cd; + fp_t hp = h; + fp_t chp = ch; +#if !defined(NOCHECKS) +#if !defined(__CUDA_ARCH__) + // expected-error@-6 {{reference to __device__ function 'd' in __host__ __device__ function}} + // expected-note@65 {{'d' declared here}} + // expected-error@-7 {{reference to __device__ function 'cd' in __host__ __device__ function}} + // expected-note@68 {{'cd' declared here}} +#else + // expected-error@-9 {{reference to __host__ function 'h' in __host__ __device__ function}} + // expected-note@66 {{'h' declared here}} + // expected-error@-10 {{reference to __host__ function 'ch' in __host__ __device__ function}} + // expected-note@69 {{'ch' declared here}} +#endif +#endif + fp_t dhp = dh; + fp_t cdhp = cdh; + gp_t gp = g; +#if defined(__CUDA_ARCH__) + // expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}} + // expected-note@67 {{'g' declared here}} +#endif + + d(); + cd(); + h(); + ch(); +#if !defined(NOCHECKS) +#if !defined(__CUDA_ARCH__) + // expected-error@-6 {{no matching function for call to 'd'}} + // expected-note@65 {{candidate function not viable: call to __device__ function from __host__ __device__ function}} + // expected-error@-7 {{no matching function for call to 'cd'}} + // expected-note@68 {{candidate function not viable: call to __device__ function from __host__ __device__ function}} +#else + // expected-error@-9 {{no matching function for call to 'h'}} + // expected-note@66 {{candidate function not viable: call to __host__ function from __host__ __device__ function}} + // expected-error@-10 {{no matching function for call to 'ch'}} + // expected-note@69 {{candidate function not viable: call to __host__ function from __host__ __device__ function}} +#endif +#endif + + dh(); + cdh(); + g(); + g<<<0,0>>>(); +#if !defined(__CUDA_ARCH__) + // expected-error@-3 {{call to global function g not configured}} +#else + // expected-error@-5 {{no matching function for call to 'g'}} + // expected-note@67 {{candidate function not viable: call to __global__ function from __host__ __device__ function}} + // expected-error@-6 {{reference to __global__ function 'g' in __host__ __device__ function}} + // expected-note@67 {{'g' declared here}} +#endif // __CUDA_ARCH__ +} + +// Test for address of overloaded function resolution in the global context. +fp_t hp = h; +fp_t chp = ch; +fp_t dhp = dh; +fp_t cdhp = cdh; +gp_t gp = g; + + +// Test overloading of destructors +// Can't mix H and unattributed destructors +struct d_h { + ~d_h() {} // expected-note {{previous declaration is here}} + __host__ ~d_h() {} // expected-error {{destructor cannot be redeclared}} +}; + +// H/D overloading is OK +struct d_dh { + __device__ ~d_dh() {} + __host__ ~d_dh() {} +}; + +// HD is OK +struct d_hd { + __host__ __device__ ~d_hd() {} +}; + +// Mixing H/D and HD is not allowed. +struct d_dhhd { + __device__ ~d_dhhd() {} + __host__ ~d_dhhd() {} // expected-note {{previous declaration is here}} + __host__ __device__ ~d_dhhd() {} // expected-error {{destructor cannot be redeclared}} +}; + +struct d_hhd { + __host__ ~d_hhd() {} // expected-note {{previous declaration is here}} + __host__ __device__ ~d_hhd() {} // expected-error {{destructor cannot be redeclared}} +}; + +struct d_hdh { + __host__ __device__ ~d_hdh() {} // expected-note {{previous declaration is here}} + __host__ ~d_hdh() {} // expected-error {{destructor cannot be redeclared}} +}; + +struct d_dhd { + __device__ ~d_dhd() {} // expected-note {{previous declaration is here}} + __host__ __device__ ~d_dhd() {} // expected-error {{destructor cannot be redeclared}} +}; + +struct d_hdd { + __host__ __device__ ~d_hdd() {} // expected-note {{previous declaration is here}} + __device__ ~d_hdd() {} // expected-error {{destructor cannot be redeclared}} +}; + +// Test overloading of member functions +struct m_h { + void operator delete(void *ptr); // expected-note {{previous declaration is here}} + __host__ void operator delete(void *ptr); // expected-error {{class member cannot be redeclared}} +}; + +// D/H overloading is OK +struct m_dh { + __device__ void operator delete(void *ptr); + __host__ void operator delete(void *ptr); +}; + +// HD by itself is OK +struct m_hd { + __device__ __host__ void operator delete(void *ptr); +}; + +struct m_hhd { + __host__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}} + __host__ __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}} +}; + +struct m_hdh { + __host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}} + __host__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}} +}; + +struct m_dhd { + __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}} + __host__ __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}} +}; + +struct m_hdd { + __host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}} + __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}} +};