Index: include/clang/Basic/LangOptions.def =================================================================== --- include/clang/Basic/LangOptions.def +++ include/clang/Basic/LangOptions.def @@ -166,6 +166,7 @@ LANGOPT(CUDAIsDevice , 1, 0, "Compiling for CUDA device") LANGOPT(CUDAAllowHostCallsFromHostDevice, 1, 0, "Allow host device functions to call host functions") LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)") +LANGOPT(CUDATargetOverloads, 1, 0, "Enable function overloads based on CUDA target attributes") LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators") LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions") Index: include/clang/Driver/CC1Options.td =================================================================== --- include/clang/Driver/CC1Options.td +++ include/clang/Driver/CC1Options.td @@ -659,6 +659,8 @@ HelpText<"Disable all cross-target (host, device, etc.) call checks in CUDA">; def fcuda_include_gpubinary : Separate<["-"], "fcuda-include-gpubinary">, HelpText<"Incorporate CUDA device-side binary into host object file.">; +def fcuda_target_overloads : Flag<["-"], "fcuda-target-overloads">, + HelpText<"Enable function overloads based on CUDA target attributes.">; } // let Flags = [CC1Option] Index: include/clang/Sema/Sema.h =================================================================== --- include/clang/Sema/Sema.h +++ include/clang/Sema/Sema.h @@ -8600,8 +8600,37 @@ CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D); + enum CUDAFunctionPreference { + CFP_Never, // Invalid caller/callee combination. + CFP_LastResort, // Lowest priority. Only in effect if + // LangOpts.CUDADisableTargetCallChecks is true. + CFP_Fallback, // Low priority caller/callee combination + CFP_Best, // Preferred caller/callee combination + }; + + /// Identifies relative preference of a given Caller/Callee + /// combination, based on their host/device attributes. + /// \param Caller function which needs address of \p Callee. + /// nullptr in case of global context. + /// \param Callee target function + /// + /// \returns preference value for particular Caller/Callee combination. + CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller, + const FunctionDecl *Callee); + bool CheckCUDATarget(const FunctionDecl *Caller, const FunctionDecl *Callee); + /// Finds a function in \p Matches with highest calling priority + /// from \p Caller context and erases all functions with lower + /// calling priority. + void EraseUnwantedCUDAMatches(const FunctionDecl *Caller, + SmallVectorImpl &Matches); + void EraseUnwantedCUDAMatches(const FunctionDecl *Caller, + SmallVectorImpl &Matches); + void EraseUnwantedCUDAMatches( + const FunctionDecl *Caller, + SmallVectorImpl> &Matches); + /// Given a implicit special member, infer its CUDA target from the /// calls it needs to make to underlying base/field special members. /// \param ClassDecl the class for which the member is being created. Index: lib/AST/ItaniumMangle.cpp =================================================================== --- lib/AST/ItaniumMangle.cpp +++ lib/AST/ItaniumMangle.cpp @@ -479,6 +479,13 @@ if (!Context.shouldMangleDeclName(FD)) return; + // __host__ __device__ functions co-exist with both __host__ and + // __device__ functions, so they need a different mangled name. + // Attributes are supposed to appear in alphabetic order, so we need + // to put 'enable_if' in-between 'device' and 'host'. + if (FD->hasAttr() && FD->hasAttr()) + Out << "Ua6device"; + if (FD->hasAttr()) { FunctionTypeDepthState Saved = FunctionTypeDepth.push(); Out << "Ua9enable_ifI"; @@ -498,6 +505,9 @@ FunctionTypeDepth.pop(Saved); } + if (FD->hasAttr() && FD->hasAttr()) + Out << "Ua4host"; + // Whether the mangling of a function type includes the return type depends on // the context and the nature of the function. The rules for deciding whether // the return type is included are: @@ -4121,4 +4131,3 @@ ItaniumMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags) { return new ItaniumMangleContextImpl(Context, Diags); } - Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -1412,6 +1412,9 @@ if (Args.hasArg(OPT_fcuda_disable_target_call_checks)) Opts.CUDADisableTargetCallChecks = 1; + if (Args.hasArg(OPT_fcuda_target_overloads)) + Opts.CUDATargetOverloads = 1; + if (Opts.ObjC1) { if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { StringRef value = arg->getValue(); Index: lib/Sema/SemaCUDA.cpp =================================================================== --- lib/Sema/SemaCUDA.cpp +++ lib/Sema/SemaCUDA.cpp @@ -60,8 +60,101 @@ return CFT_Host; } +// * CUDA Call preference table +// +// F - from, +// T - to +// Ph - preference in host mode +// Pd - preference in device mode +// H - handled in (x) +// Preferences: b-best, f-fallback, l-last resort, n-never. +// +// | F | T | Ph | Pd | H | +// |----+----+----+----+-----+ +// | d | d | b | b | (b) | +// | d | g | n | n | (a) | +// | d | h | l | l | (e) | +// | d | hd | f | f | (c) | +// | g | d | b | b | (b) | +// | g | g | n | n | (a) | +// | g | h | l | l | (e) | +// | g | hd | f | f | (c) | +// | h | d | l | l | (e) | +// | h | g | b | b | (b) | +// | h | h | b | b | (b) | +// | h | hd | f | f | (c) | +// | hd | d | l | f | (d) | +// | hd | g | f | n |(d/a)| +// | hd | h | f | l | (d) | +// | hd | hd | b | b | (b) | + +Sema::CUDAFunctionPreference +Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, + const FunctionDecl *Callee) { + assert(getLangOpts().CUDATargetOverloads && + "Should not be called w/o enabled target overloads."); + + assert(Callee && "Callee must be valid."); + CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); + CUDAFunctionTarget CallerTarget = + (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host; + + // If one of the targets is invalid, the check always fails, no matter what + // the other target is. + if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) + return CFP_Never; + + // (a) Can't call global from some contexts until we support CUDA's + // dynamic parallelism. + if (CalleeTarget == CFT_Global && + (CallerTarget == CFT_Global || CallerTarget == CFT_Device || + (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice))) + return CFP_Never; + + // (b) Best case scenarios + if (CalleeTarget == CallerTarget || + (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || + (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) + return CFP_Best; + + // (c) Calling HostDevice is OK as a fallback that works for everyone. + if (CalleeTarget == CFT_HostDevice) + return CFP_Fallback; + + // Figure out what should be returned 'last resort' cases. Normally + // those would not be allowed, but we'll consider them if + // CUDADisableTargetCallChecks is true. + CUDAFunctionPreference QuestionableResult = + getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never; + + // (d) HostDevice behavior depends on compilation mode. + if (CallerTarget == CFT_HostDevice) { + // Calling a function that matches compilation mode is OK. + // Calling a function from the other side is frowned upon. + if (getLangOpts().CUDAIsDevice) + return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult; + else + return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global) + ? CFP_Fallback + : QuestionableResult; + } + + // (e) Calling across device/host boundary is not something you should do. + if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || + (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || + (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) + return QuestionableResult; + + llvm_unreachable("All cases should've been handled by now."); +} + bool Sema::CheckCUDATarget(const FunctionDecl *Caller, const FunctionDecl *Callee) { + // With target overloads enabled, we only disallow calling + // combinations with CFP_Never. + if (getLangOpts().CUDATargetOverloads) + return IdentifyCUDAPreference(Caller,Callee) == CFP_Never; + // The CUDADisableTargetCallChecks short-circuits this check: we assume all // cross-target calls are valid. if (getLangOpts().CUDADisableTargetCallChecks) @@ -117,6 +210,57 @@ return false; } +template +static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller, + llvm::SmallVectorImpl &Matches, + FetchDeclFn FetchDecl) { + assert(S.getLangOpts().CUDATargetOverloads && + "Should not be called w/o enabled target overloads."); + if (Matches.size() <= 1) + return; + + // Find the best call preference among the functions in Matches. + Sema::CUDAFunctionPreference P, BestCFP = Sema::CFP_Never; + for (auto const &Match : Matches) { + P = S.IdentifyCUDAPreference(Caller, FetchDecl(Match)); + if (P > BestCFP) + BestCFP = P; + } + + // Erase all functions with lower priority. + for (unsigned I = 0, N = Matches.size(); I != N;) + if (S.IdentifyCUDAPreference(Caller, FetchDecl(Matches[I])) < BestCFP) { + Matches[I] = Matches[--N]; + Matches.resize(N); + } else { + ++I; + } +} + +void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller, + SmallVectorImpl &Matches){ + EraseUnwantedCUDAMatchesImpl( + *this, Caller, Matches, [](const FunctionDecl *item) { return item; }); +} + +void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller, + SmallVectorImpl &Matches) { + EraseUnwantedCUDAMatchesImpl( + *this, Caller, Matches, [](const DeclAccessPair &item) { + return dyn_cast(item.getDecl()); + }); +} + +void Sema::EraseUnwantedCUDAMatches( + const FunctionDecl *Caller, + SmallVectorImpl> &Matches){ + EraseUnwantedCUDAMatchesImpl>( + *this, Caller, Matches, + [](const std::pair &item) { + return dyn_cast(item.second); + }); +} + /// When an implicitly-declared special member has to invoke more than one /// base/field special member, conflicts may occur in the targets of these /// members. For example, if one base's member __host__ and another's is Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -5510,6 +5510,12 @@ // In C++, the overloadable attribute negates the effects of extern "C". if (!D->isInExternCContext() || D->template hasAttr()) return false; + + // So do CUDA's host/device attributes if overloading is enabled. + if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads && + (D->template hasAttr() || + D->template hasAttr())) + return false; } return D->isExternC(); } Index: lib/Sema/SemaExprCXX.cpp =================================================================== --- lib/Sema/SemaExprCXX.cpp +++ lib/Sema/SemaExprCXX.cpp @@ -2256,6 +2256,9 @@ "found an unexpected usual deallocation function"); } + if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads) + EraseUnwantedCUDAMatches(dyn_cast(CurContext), Matches); + assert(Matches.size() == 1 && "unexpectedly have multiple usual deallocation functions"); return Matches.front(); @@ -2287,6 +2290,9 @@ Matches.push_back(F.getPair()); } + if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads) + EraseUnwantedCUDAMatches(dyn_cast(CurContext), Matches); + // There's exactly one suitable operator; pick it. if (Matches.size() == 1) { Operator = cast(Matches[0]->getUnderlyingDecl()); Index: lib/Sema/SemaOverload.cpp =================================================================== --- lib/Sema/SemaOverload.cpp +++ lib/Sema/SemaOverload.cpp @@ -1067,6 +1067,25 @@ return true; } + if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads) { + CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New), + OldTarget = IdentifyCUDATarget(Old); + if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global) + return false; + + assert((OldTarget != CFT_InvalidTarget) && "Unexpected invalid target."); + + // Codegen expects unique destructor, so we don't allow HD + // destructors if we already have one with different target. + if (isa(New) && + ((NewTarget == CFT_HostDevice) != (OldTarget == CFT_HostDevice))) + return false; + + // Allow overloading of functions with same signature, but + // different CUDA target attributes. + return NewTarget != OldTarget; + } + // The signatures match; this is not an overload. return false; } @@ -8503,6 +8522,13 @@ return true; } + if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads && + Cand1.Function && Cand2.Function) { + FunctionDecl *Caller = dyn_cast(S.CurContext); + return S.IdentifyCUDAPreference(Caller, Cand1.Function) > + S.IdentifyCUDAPreference(Caller, Cand2.Function); + } + return false; } @@ -9920,6 +9946,10 @@ EliminateAllExceptMostSpecializedTemplate(); } } + + if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads && + Matches.size() > 1) + EliminateSuboptimalCudaMatches(); } private: @@ -10095,11 +10125,15 @@ ++I; else { Matches[I] = Matches[--N]; - Matches.set_size(N); + Matches.resize(N); } } } + void EliminateSuboptimalCudaMatches() { + S.EraseUnwantedCUDAMatches(dyn_cast(S.CurContext), Matches); + } + public: void ComplainNoMatchesFound() const { assert(Matches.empty()); Index: test/CodeGenCUDA/function-overload.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/function-overload.cu @@ -0,0 +1,273 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// Make sure we handle target overloads correctly. +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -fcuda-target-overloads -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \ +// RUN: -fcuda-target-overloads -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s + +// Check target overloads handling with disabled call target checks. +// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST \ +// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-HOST-NC %s +// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -emit-llvm \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \ +// RUN: -fcuda-is-device -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \ +// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-DEVICE-NC %s + +#include "Inputs/cuda.h" + +typedef int (*fp_t)(void); +typedef void (*gp_t)(void); + +// CHECK-HOST: @dhddp = global {{.*}} @_Z4dhhdv +// CHECK-HOST: @hhdp = global {{.*}} @_Z3hhdv +// CHECK-HOST: @dhdp = global {{.*}} @_Z3dhdUa6deviceUa4hostv +// CHECK-HOST: @gp = global {{.*}} @_Z1gv +// CHECK-HOST: @hp = global {{.*}} @_Z1hv + +// Check proper mangling if we mix with enable_if attribute +__device__ __host__ __attribute__((enable_if(1, ""))) void hde() {} +// CHECK-BOTH-LABEL: define void @_Z3hdeUa6deviceUa9enable_ifIXLi1EEEUa4hostv + +// CHECK-BOTH-LABEL: define i32 @_Z4dhhdv() +__device__ int dhhd(void) { return 1; } +// CHECK-DEVICE: ret i32 1 + +__host__ int dhhd(void) { return 2; } +// CHECK-HOST: ret i32 2 + +// CHECK-BOTH-LABEL: define i32 @_Z4dhhdUa6deviceUa4hostv() +__host__ __device__ int dhhd(void) { return 3; } +// CHECK-BOTH: ret i32 3 + +// CHECK-HOST-LABEL: define i32 @_Z3hhdv() +__host__ int hhd(void) { return 4; } +// CHECK-HOST: ret i32 4 + +// CHECK-BOTH-LABEL: define i32 @_Z3dhdUa6deviceUa4hostv() +__host__ __device__ int dhd(void) { return 5; } +// CHECK-BOTH: ret i32 5 + +// CHECK-DEVICE-LABEL: define i32 @_Z3dhdv() +__device__ int dhd(void) { return 6; } +// CHECK-DEVICE: ret i32 6 + +// CHECK-BOTH-LABEL: define i32 @_Z3hhdUa6deviceUa4hostv() +__host__ __device__ int hhd(void) { return 7; } +// CHECK-BOTH: ret i32 7 + +// CHECK-DEVICE-LABEL: define i32 @_Z1dv() +__device__ int d(void) { return 8; } +// CHECK-DEVICE: ret i32 8 + +// CHECK-HOST-LABEL: define i32 @_Z1hv() +__host__ int h(void) { return 9; } +// CHECK-HOST: ret i32 9 + +// CHECK-BOTH-LABEL: define void @_Z1gv() +__global__ void g(void) {} +// CHECK-BOTH: ret void + +// mangled names of extern "C" __host__ __device__ functions clash +// with those of their __host__/__device__ counterparts, so +// overloading of extern "C" functions can only happen for __host__ +// and __device__ functions -- we never codegen them in the same +// compilation and therefore mangled name conflict is not a problem. + +// CHECK-DEVICE-LABEL: define i32 @chd() +extern "C" __device__ int chd(void) {return 10;} +// CHECK-DEVICE: ret i32 10 + +// CHECK-HOST-LABEL: define i32 @chd() +extern "C" __host__ int chd(void) {return 11;} +// CHECK-HOST: ret i32 11 + + +// CHECK-HOST-LABEL: define void @_Z5hostfv() +__host__ void hostf(void) { + fp_t dhddp = dhhd; // CHECK-HOST: store {{.*}} @_Z4dhhdv, {{.*}} %dhddp, + fp_t hhdp = hhd; // CHECK-HOST: store {{.*}} @_Z3hhdv, {{.*}} %hhdp, + fp_t dhdp = dhd; // CHECK-HOST: store {{.*}} @_Z3dhdUa6deviceUa4hostv, {{.*}} %dhdp, +#if defined (NOCHECKS) + fp_t dp = d; // CHECK-HOST-NC: store {{.*}} @_Z1dv, {{.*}} %dp, +#endif + fp_t hp = h; // CHECK-HOST: store {{.*}} @_Z1hv, {{.*}} %hp, + fp_t chdp = chd; // CHECK-HOST: store {{.*}} @chd, {{.*}} %chdp, + + dhhd(); // CHECK-HOST: call i32 @_Z4dhhdv() + hhd(); // CHECK-HOST: call i32 @_Z3hhdv() + dhd(); // CHECK-HOST: call i32 @_Z3dhdUa6deviceUa4hostv() + h(); // CHECK-HOST: call i32 @_Z1hv() + chd(); // CHECK-HOST: call i32 @chd() +} + +// CHECK-DEVICE-LABEL: define void @_Z7devicefv() +__device__ void devicef(void) { + fp_t dhddp = dhhd; // CHECK-DEVICE: store {{.*}} @_Z4dhhdv, {{.*}} %dhddp, + fp_t hhdp = hhd; // CHECK-DEVICE: store {{.*}} @_Z3hhdUa6deviceUa4hostv, {{.*}} %hhdp, + fp_t dhdp = dhd; // CHECK-DEVICE: store {{.*}} @_Z3dhdv, {{.*}} %dhdp, + fp_t dp = d; // CHECK-DEVICE: store {{.*}} @_Z1dv, {{.*}} %dp, +#if defined (NOCHECKS) + fp_t hp = h; // CHECK-DEVICE-NC: store {{.*}} @_Z1hv, {{.*}} %hp, +#endif + fp_t chdp = chd; // CHECK-DEVICE: store {{.*}} @chd, {{.*}} %chdp, + + dhhd(); // CHECK-DEVICE: call i32 @_Z4dhhdv() + hhd(); // CHECK-DEVICE: call i32 @_Z3hhdUa6deviceUa4hostv() + dhd(); // CHECK-DEVICE: call i32 @_Z3dhdv() + d(); // CHECK-DEVICE: call i32 @_Z1dv() + chd(); // CHECK-DEVICE: call i32 @chd() +} + +// CHECK-BOTH-LABEL: define void @_Z11hostdevicefUa6deviceUa4hostv() +__host__ __device__ void hostdevicef(void) { + fp_t dhddp = dhhd; // CHECK-BOTH: store {{.*}} @_Z4dhhdUa6deviceUa4hostv, {{.*}} %dhddp, + fp_t hhdp = hhd; // CHECK-BOTH: store {{.*}} @_Z3hhdUa6deviceUa4hostv, {{.*}} %hhdp, + fp_t dhdp = dhd; // CHECK-BOTH: store {{.*}} @_Z3dhdUa6deviceUa4hostv, {{.*}} %dhdp, +#if defined (NOCHECKS) + fp_t dp = d; // CHECK-BOTH-NC: store {{.*}} @_Z1dv, {{.*}} %dp, + fp_t hp = h; // CHECK-BOTH-NC: store {{.*}} @_Z1hv, {{.*}} %hp, +#endif + // chd would be __host__ or __device__ depending on compilation mode. + fp_t chdp = chd; // CHECK-BOTH: store {{.*}} @chd, {{.*}} %chdp, + + dhhd(); // CHECK-BOTH: call i32 @_Z4dhhdUa6deviceUa4hostv() + hhd(); // CHECK-BOTH: call i32 @_Z3hhdUa6deviceUa4hostv() + dhd(); // CHECK-BOTH: call i32 @_Z3dhdUa6deviceUa4hostv() + +#if defined(NOCHECKS) || defined(__CUDA_ARCH__) + d(); // CHECK-BOTH-NC: call i32 @_Z1dv() +#endif +#if defined(NOCHECKS) || !defined(__CUDA_ARCH__) + h(); // CHECK-BOTH-NC: call i32 @_Z1hv() +#endif + chd(); // CHECK-BOTH: call i32 @chd() +} + +// Test for address of overloaded function resolution in the global context. +fp_t dhddp = dhhd; +fp_t hhdp = hhd; +fp_t dhdp = dhd; +gp_t gp = g; +fp_t hp = h; + +int x; +// Check constructors/destructors for D/H functions +struct s_cd_dh { + __host__ s_cd_dh() { x = 11; } + __device__ s_cd_dh() { x = 12; } + __host__ __device__ s_cd_dh() { x = 13; } + __host__ ~s_cd_dh() { x = 21; } + __device__ ~s_cd_dh() { x = 22; } + // no HD as it is not allowed to overload H/D for destructors. +}; + +struct s_cd_hd { + __host__ __device__ s_cd_hd() { x = 31; } + __host__ __device__ ~s_cd_hd() { x = 32; } +}; + +// Check overloading of (de)allocators for D/H/HD functions +struct s_da_dhhd { + int placeholder; + + __host__ void *operator new(size_t count) { x = 41; return &x; } + __device__ void *operator new(size_t count) { x = 42; return &x; } + __host__ __device__ void *operator new(size_t count) { x=43; return &x; } + + __host__ void operator delete(void *ptr) { x = 44; } + __device__ void operator delete(void *ptr) { x = 45; } + __host__ __device__ void operator delete(void *ptr) { x = 46; } +}; + +// Check overloading of (de)allocators for D/HD functions +struct s_da_dhd { + int placeholder; + + __host__ void *operator new(size_t count) { x = 51; return &x; } + __host__ __device__ void *operator new(size_t count) { x=52; return &x; } + + __host__ void operator delete(void *ptr) { x = 53; } + __host__ __device__ void operator delete(void *ptr) { x = 54; } +}; + +// CHECK-BOTH: define void @_Z7wrapperv +#if defined(__CUDA_ARCH__) +__device__ +#else +__host__ +#endif +void wrapper() { + s_da_dhhd *sdap = new s_da_dhhd; + // CHECK-BOTH: call i8* @_ZN9s_da_dhhdnwEm( + delete sdap; + // CHECK-BOTH: call void @_ZN9s_da_dhhddlEPv( + + s_da_dhd *sdadhdp = new s_da_dhd; + // CHECK-HOST: call i8* @_ZN8s_da_dhdnwEm( + // CHECK-DEVICE: call i8* @_ZN8s_da_dhdnwEUa6deviceUa4hostm( + delete sdadhdp; + // CHECK-HOST: call void @_ZN8s_da_dhddlEPv( + // CHECK-DEVICE: call void @_ZN8s_da_dhddlEUa6deviceUa4hostPv( + + s_cd_dh scddh; + // CHECK-BOTH: call void @_ZN7s_cd_dhC1Ev( + s_cd_hd scdhd; + // CHECK-BOTH: call void @_ZN7s_cd_hdC1EUa6deviceUa4hostv + + // CHECK-BOTH: call void @_ZN7s_cd_hdD1EUa6deviceUa4hostv( + // CHECK-BOTH: call void @_ZN7s_cd_dhD1Ev( +} +// CHECK-BOTH: ret void + +// Now it's time to check what's been generated for the methods we used. + +// void *operator new(size_t count); +// CHECK-BOTH: define linkonce_odr i8* @_ZN9s_da_dhhdnwEm( +// CHECK-HOST: store i32 41, +// CHECK-DEVICE: store i32 42, +// CHECK-BOTH: } + +// void operator delete(void *ptr); +// CHECK-BOTH: define linkonce_odr void @_ZN9s_da_dhhddlEPv( +// CHECK-HOST: store i32 44, +// CHECK-DEVICE: store i32 45, +// CHECK-BOTH: } + +// void *operator new(size_t count); +// CHECK-HOST: define linkonce_odr i8* @_ZN8s_da_dhdnwEm( +// CHECK-DEVICE: define linkonce_odr i8* @_ZN8s_da_dhdnwEUa6deviceUa4hostm( +// CHECK-HOST: store i32 51, +// CHECK-DEVICE: store i32 52, +// CHECK-BOTH: } + +// void operator delete(void *ptr); +// CHECK-HOST: define linkonce_odr void @_ZN8s_da_dhddlEPv( +// CHECK-DEVICE: define linkonce_odr void @_ZN8s_da_dhddlEUa6deviceUa4hostPv( +// CHECK-HOST: store i32 53, +// CHECK-DEVICE: store i32 54, +// CHECK-BOTH: } + +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhC2Ev( +// CHECK-HOST: store i32 11, +// CHECK-DEVICE: store i32 12, +// CHECK-BOTH: ret void + +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2EUa6deviceUa4hostv( +// CHECK-BOTH: store i32 31, +// CHECK-BOTH: ret void + +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2EUa6deviceUa4hostv( +// CHECK-BOTH: store i32 32, +// CHECK-BOTH: ret void + +// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhD2Ev( +// CHECK-HOST: store i32 21, +// CHECK-DEVICE: store i32 22, +// CHECK-BOTH: ret void Index: test/SemaCUDA/function-overload.cu =================================================================== --- /dev/null +++ test/SemaCUDA/function-overload.cu @@ -0,0 +1,241 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// Make sure we handle target overloads correctly. +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -fsyntax-only -fcuda-target-overloads -verify %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \ +// RUN: -fsyntax-only -fcuda-target-overloads -fcuda-is-device -verify %s + +// Check target overloads handling with disabled call target checks. +// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -verify %s +// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -fsyntax-only \ +// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \ +// RUN: -fcuda-is-device -verify %s + +#include "Inputs/cuda.h" + +typedef int (*fp_t)(void); +typedef void (*gp_t)(void); + +__device__ int dhhd(void) { return 2; } +__host__ int dhhd(void) { return 1; } // expected-note {{previous definition is here}} +int dhhd(void) { return 1; } // expected-error {{redefinition of 'dhhd'}} +__host__ __device__ int dhhd(void) { return 3; } + +__host__ int hhd(void) { return 4; } +__host__ __device__ int dhd(void) { return 5; } + +__device__ int dhd(void) { return 6; } +__host__ __device__ int hhd(void) { return 7; } + +__device__ int d(void) { return 8; } +__host__ int h(void) { return 9; } +__global__ void g(void) {} + +extern "C" __device__ int chd(void) {return 10;} +extern "C" __host__ int chd(void) {return 11;} // expected-note {{previous definition is here}} +extern "C" int chd(void) {return 11;} // expected-error {{redefinition of 'chd'}} +extern "C" __host__ __device__ int chd(void) {return 12;} // expected-note {{previous definition is here}} +extern "C" __host__ __device__ int chd(void) {return 13;} // expected-error {{redefinition of 'chd'}} + +__host__ void hostf(void) { + fp_t dhddp = dhhd; + fp_t hhdp = hhd; + fp_t dhdp = dhd; + gp_t gp = g; + fp_t dp = d; +#if !defined(NOCHECKS) + // expected-error@-2 {{reference to __device__ function 'd' in __host__ function}} + // expected-note@33 {{'d' declared here}} +#endif + fp_t hp = h; + + dhhd(); + hhd(); + dhd(); + chd(); + d(); +#if !defined(NOCHECKS) + // expected-error@-2 {{no matching function for call to 'd'}} + // expected-note@33 {{candidate function not viable: call to __device__ function from __host__ function}} +#endif + h(); + + g(); // expected-error {{call to global function g not configured}} + g<<<0,0>>>(); +} + +__device__ void devicef(void) { + fp_t dhddp = dhhd; + fp_t hhdp = hhd; + fp_t dhdp = dhd; + gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __device__ function}} + // expected-note@35 {{'g' declared here}} + fp_t dp = d; + fp_t hp = h; +#if !defined(NOCHECKS) + // expected-error@-2 {{reference to __host__ function 'h' in __device__ function}} + // expected-note@34 {{'h' declared here}} +#endif + + dhhd(); + hhd(); + dhd(); + chd(); + d(); + h(); + g(); +#if !defined(NOCHECKS) + // expected-error@-3 {{no matching function for call to 'h'}} + // expected-note@34 {{candidate function not viable: call to __host__ function from __device__ function}} +#endif + // expected-error@-5 {{no matching function for call to 'g'}} + // expected-note@35 {{candidate function not viable: call to __global__ function from __device__ function}} + g<<<0,0>>>(); + // expected-error@-1 {{reference to __global__ function 'g' in __device__ function}} + // expected-note@35 {{'g' declared here}} +} + +__global__ void globalf(void) { + fp_t dhddp = dhhd; + fp_t hhdp = hhd; + fp_t dhdp = dhd; + gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __global__ function}} + // expected-note@35 {{'g' declared here}} + fp_t dp = d; + fp_t hp = h; +#if !defined(NOCHECKS) + // expected-error@-2 {{reference to __host__ function 'h' in __global__ function}} + // expected-note@34 {{'h' declared here}} +#endif + + dhhd(); + hhd(); + dhd(); + chd(); + d(); + h(); +#if !defined(NOCHECKS) + // expected-error@-2 {{no matching function for call to 'h'}} + // expected-note@34 {{candidate function not viable: call to __host__ function from __global__ function}} +#endif + g(); + // expected-error@-1 {{no matching function for call to 'g'}} + // expected-note@35 {{candidate function not viable: call to __global__ function from __global__ function}} + g<<<0,0>>>(); + // expected-error@-1 {{reference to __global__ function 'g' in __global__ function}} + // expected-note@35 {{'g' declared here}} + +} + +__host__ __device__ void hostdevicef(void) { + fp_t dhddp = dhhd; + fp_t hhdp = hhd; + fp_t dhdp = dhd; + gp_t gp = g; +#if defined(__CUDA_ARCH__) + // expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}} + // expected-note@35 {{'g' declared here}} +#endif + fp_t dp = d; + fp_t hp = h; +#if !defined(NOCHECKS) +#if !defined(__CUDA_ARCH__) + // expected-error@-4 {{reference to __device__ function 'd' in __host__ __device__ function}} + // expected-note@33 {{'d' declared here}} +#else + // expected-error@-6 {{reference to __host__ function 'h' in __host__ __device__ function}} + // expected-note@34 {{'h' declared here}} +#endif +#endif + + dhhd(); + hhd(); + dhd(); + chd(); + + d(); + h(); + g(); + g<<<0,0>>>(); +#if !defined(__CUDA_ARCH__) +#if !defined(NOCHECKS) + // expected-error@-6 {{no matching function for call to 'd'}} + // expected-note@33 {{candidate function not viable: call to __device__ function from __host__ __device__ function}} +#endif + // expected-error@-7 {{call to global function g not configured}} +#else +#if !defined(NOCHECKS) + // expected-error@-11 {{no matching function for call to 'h'}} + // expected-note@34 {{candidate function not viable: call to __host__ function from __host__ __device__ function}} +#endif + // expected-error@-13 {{no matching function for call to 'g'}} + // expected-note@35 {{candidate function not viable: call to __global__ function from __host__ __device__ function}} + // expected-error@-14 {{reference to __global__ function 'g' in __host__ __device__ function}} + // expected-note@35 {{'g' declared here}} +#endif // __CUDA_ARCH__ +} + +// Test for address of overloaded function resolution in the global context. +fp_t dhddp = dhhd; +fp_t hhdp = hhd; +fp_t dhdp = dhd; +gp_t gp = g; +fp_t hp = h; + +// Test overloading of destructors. We need to ensure there's only one +// destructor present for codegen. + +// We codegen only one of these destructors, so there's no ambiguity +struct s0 { + __host__ ~s0() {} + __device__ ~s0() {} +}; + +// __host__ __device__ destructor by itself is fine, too. +struct shd { + __host__ __device__ ~shd() {} +}; + +// __host__ __device__ destructor can't overload __host__ or +// __device__ destructors because we'll codegen both of them. +struct s1 { + __host__ ~s1() {} + __device__ ~s1() {} // expected-note {{previous declaration is here}} + __host__ __device__ ~s1() {} // expected-error {{destructor cannot be redeclared}} +}; + +struct s2h { + __host__ ~s2h() {} // expected-note {{previous declaration is here}} + __host__ __device__ ~s2h() {} // expected-error {{destructor cannot be redeclared}} +}; + +struct s3h { + __host__ __device__ ~s3h() {} // expected-note {{previous declaration is here}} + __host__ ~s3h() {} // expected-error {{destructor cannot be redeclared}} +}; + +struct s2d { + __device__ ~s2d() {} // expected-note {{previous declaration is here}} + __host__ __device__ ~s2d() {} // expected-error {{destructor cannot be redeclared}} +}; + +struct s3d { + __host__ __device__ ~s3d() {} // expected-note {{previous declaration is here}} + __device__ ~s3d() {} // expected-error {{destructor cannot be redeclared}} +}; + +// Test (de)allocation functions + +struct a0 { + void operator delete(void *ptr); // expected-note {{previous declaration is here}} + __host__ void operator delete(void *ptr); // expected-error {{class member cannot be redeclared}} + void operator delete[](void *ptr); // expected-note {{previous declaration is here}} + __host__ void operator delete[](void *ptr); // expected-error {{class member cannot be redeclared}} + __device__ void operator delete(void *ptr); + __device__ void operator delete[](void *ptr); + __host__ __device__ void operator delete(void *ptr); + __host__ __device__ void operator delete[](void *ptr); +};