Index: cfe/trunk/include/clang/Sema/Sema.h =================================================================== --- cfe/trunk/include/clang/Sema/Sema.h +++ cfe/trunk/include/clang/Sema/Sema.h @@ -8794,12 +8794,18 @@ CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D); + // CUDA function call preference. Must be ordered numerically from + // worst to best. enum CUDAFunctionPreference { CFP_Never, // Invalid caller/callee combination. - CFP_LastResort, // Lowest priority. Only in effect if + CFP_WrongSide, // Calls from host-device to host or device + // function that do not match current compilation + // mode. Only in effect if // LangOpts.CUDADisableTargetCallChecks is true. - CFP_Fallback, // Low priority caller/callee combination - CFP_Best, // Preferred caller/callee combination + CFP_HostDevice, // Any calls to host/device functions. + CFP_SameSide, // Calls from host-device to host or device + // function matching current compilation mode. + CFP_Native, // host-to-host or device-to-device calls. }; /// Identifies relative preference of a given Caller/Callee Index: cfe/trunk/lib/Sema/SemaCUDA.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaCUDA.cpp +++ cfe/trunk/lib/Sema/SemaCUDA.cpp @@ -68,26 +68,26 @@ // Ph - preference in host mode // Pd - preference in device mode // H - handled in (x) -// Preferences: b-best, f-fallback, l-last resort, n-never. +// Preferences: N:native, HD:host-device, SS:same side, WS:wrong side, --:never. // -// | F | T | Ph | Pd | H | -// |----+----+----+----+-----+ -// | d | d | b | b | (b) | -// | d | g | n | n | (a) | -// | d | h | l | l | (e) | -// | d | hd | f | f | (c) | -// | g | d | b | b | (b) | -// | g | g | n | n | (a) | -// | g | h | l | l | (e) | -// | g | hd | f | f | (c) | -// | h | d | l | l | (e) | -// | h | g | b | b | (b) | -// | h | h | b | b | (b) | -// | h | hd | f | f | (c) | -// | hd | d | l | f | (d) | -// | hd | g | f | n |(d/a)| -// | hd | h | f | l | (d) | -// | hd | hd | b | b | (b) | +// | F | T | Ph | Pd | H | +// |----+----+-----+-----+-----+ +// | d | d | N | N | (c) | +// | d | g | -- | -- | (a) | +// | d | h | -- | -- | (e) | +// | d | hd | HD | HD | (b) | +// | g | d | N | N | (c) | +// | g | g | -- | -- | (a) | +// | g | h | -- | -- | (e) | +// | g | hd | HD | HD | (b) | +// | h | d | -- | -- | (e) | +// | h | g | N | N | (c) | +// | h | h | N | N | (c) | +// | h | hd | HD | HD | (b) | +// | hd | d | WS | SS | (d) | +// | hd | g | SS | -- |(d/a)| +// | hd | h | SS | WS | (d) | +// | hd | hd | HD | HD | (b) | Sema::CUDAFunctionPreference Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, @@ -112,39 +112,38 @@ (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice))) return CFP_Never; - // (b) Best case scenarios + // (b) Calling HostDevice is OK for everyone. + if (CalleeTarget == CFT_HostDevice) + return CFP_HostDevice; + + // (c) Best case scenarios if (CalleeTarget == CallerTarget || (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) - return CFP_Best; - - // (c) Calling HostDevice is OK as a fallback that works for everyone. - if (CalleeTarget == CFT_HostDevice) - return CFP_Fallback; - - // Figure out what should be returned 'last resort' cases. Normally - // those would not be allowed, but we'll consider them if - // CUDADisableTargetCallChecks is true. - CUDAFunctionPreference QuestionableResult = - getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never; + return CFP_Native; // (d) HostDevice behavior depends on compilation mode. if (CallerTarget == CFT_HostDevice) { - // Calling a function that matches compilation mode is OK. - // Calling a function from the other side is frowned upon. - if (getLangOpts().CUDAIsDevice) - return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult; - else - return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global) - ? CFP_Fallback - : QuestionableResult; + // It's OK to call a compilation-mode matching function from an HD one. + if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || + (!getLangOpts().CUDAIsDevice && + (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) + return CFP_SameSide; + + // We'll allow calls to non-mode-matching functions if target call + // checks are disabled. This is needed to avoid complaining about + // HD->H calls when we compile for device side and vice versa. + if (getLangOpts().CUDADisableTargetCallChecks) + return CFP_WrongSide; + + return CFP_Never; } // (e) Calling across device/host boundary is not something you should do. if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) - return QuestionableResult; + return CFP_Never; llvm_unreachable("All cases should've been handled by now."); } Index: cfe/trunk/lib/Sema/SemaOverload.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaOverload.cpp +++ cfe/trunk/lib/Sema/SemaOverload.cpp @@ -8722,14 +8722,44 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc, iterator &Best, bool UserDefinedConversion) { + llvm::SmallVector Candidates; + std::transform(begin(), end(), std::back_inserter(Candidates), + [](OverloadCandidate &Cand) { return &Cand; }); + + // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA + // but accepted by both clang and NVCC. However during a particular + // compilation mode only one call variant is viable. We need to + // exclude non-viable overload candidates from consideration based + // only on their host/device attributes. Specifically, if one + // candidate call is WrongSide and the other is SameSide, we ignore + // the WrongSide candidate. + if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads) { + const FunctionDecl *Caller = dyn_cast(S.CurContext); + bool ContainsSameSideCandidate = + llvm::any_of(Candidates, [&](OverloadCandidate *Cand) { + return Cand->Function && + S.IdentifyCUDAPreference(Caller, Cand->Function) == + Sema::CFP_SameSide; + }); + if (ContainsSameSideCandidate) { + auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) { + return Cand->Function && + S.IdentifyCUDAPreference(Caller, Cand->Function) == + Sema::CFP_WrongSide; + }; + Candidates.erase(std::remove_if(Candidates.begin(), Candidates.end(), + IsWrongSideCandidate), + Candidates.end()); + } + } + // Find the best viable function. Best = end(); - for (iterator Cand = begin(); Cand != end(); ++Cand) { + for (auto *Cand : Candidates) if (Cand->Viable) if (Best == end() || isBetterOverloadCandidate(S, *Cand, *Best, Loc, UserDefinedConversion)) Best = Cand; - } // If we didn't find any viable functions, abort. if (Best == end()) @@ -8739,7 +8769,7 @@ // Make sure that this function is better than every other viable // function. If not, we have an ambiguity. - for (iterator Cand = begin(); Cand != end(); ++Cand) { + for (auto *Cand : Candidates) { if (Cand->Viable && Cand != Best && !isBetterOverloadCandidate(S, *Best, *Cand, Loc, Index: cfe/trunk/test/CodeGenCUDA/function-overload.cu =================================================================== --- cfe/trunk/test/CodeGenCUDA/function-overload.cu +++ cfe/trunk/test/CodeGenCUDA/function-overload.cu @@ -7,7 +7,8 @@ // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \ // RUN: -fcuda-target-overloads -emit-llvm -o - %s \ -// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \ +// RUN: -check-prefix=CHECK-DEVICE-STRICT %s // Check target overloads handling with disabled call target checks. // RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -emit-llvm \ @@ -77,12 +78,112 @@ extern "C" __host__ __device__ int chd(void) {return 14;} // CHECK-BOTH: ret i32 14 +// HD functions are sometimes allowed to call H or D functions -- this +// is an artifact of the source-to-source splitting performed by nvcc +// that we need to mimic. During device mode compilation in nvcc, host +// functions aren't present at all, so don't participate in +// overloading. But in clang, H and D functions are present in both +// compilation modes. Clang normally uses the target attribute as a +// tiebreaker between overloads with otherwise identical priority, but +// in order to match nvcc's behavior, we sometimes need to wholly +// discard overloads that would not be present during compilation +// under nvcc. + +template T template_vs_function(T arg) { return 15; } +__device__ float template_vs_function(float arg) { return 16; } + +// Here we expect to call the templated function during host +// compilation, even if -fcuda-disable-target-call-checks is passed, +// and even though C++ overload rules prefer the non-templated +// function. +// CHECK-BOTH-LABEL: define void @_Z5hd_tfv() +__host__ __device__ void hd_tf(void) { + template_vs_function(1.0f); + // CHECK-HOST: call float @_Z20template_vs_functionIfET_S0_(float + // CHECK-DEVICE: call float @_Z20template_vs_functionf(float + template_vs_function(2.0); + // CHECK-HOST: call double @_Z20template_vs_functionIdET_S0_(double + // CHECK-DEVICE: call float @_Z20template_vs_functionf(float +} + +// Calls from __host__ and __device__ functions should always call the +// overloaded function that matches their mode. +// CHECK-HOST-LABEL: define void @_Z4h_tfv() +__host__ void h_tf() { + template_vs_function(1.0f); + // CHECK-HOST: call float @_Z20template_vs_functionIfET_S0_(float + template_vs_function(2.0); + // CHECK-HOST: call double @_Z20template_vs_functionIdET_S0_(double +} + +// CHECK-DEVICE-LABEL: define void @_Z4d_tfv() +__device__ void d_tf() { + template_vs_function(1.0f); + // CHECK-DEVICE: call float @_Z20template_vs_functionf(float + template_vs_function(2.0); + // CHECK-DEVICE: call float @_Z20template_vs_functionf(float +} + +// In case we have a mix of HD and H-only or D-only candidates in the +// overload set, normal C++ overload resolution rules apply first. +template T template_vs_hd_function(T arg) { return 15; } +__host__ __device__ float template_vs_hd_function(float arg) { return 16; } + +// CHECK-BOTH-LABEL: define void @_Z7hd_thdfv() +__host__ __device__ void hd_thdf() { + template_vs_hd_function(1.0f); + // CHECK-HOST: call float @_Z23template_vs_hd_functionf(float + // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float + template_vs_hd_function(1); + // CHECK-HOST: call i32 @_Z23template_vs_hd_functionIiET_S0_(i32 + // CHECK-DEVICE-STRICT: call float @_Z23template_vs_hd_functionf(float + // CHECK-DEVICE-NC: call i32 @_Z23template_vs_hd_functionIiET_S0_(i32 +} + +// CHECK-HOST-LABEL: define void @_Z6h_thdfv() +__host__ void h_thdf() { + template_vs_hd_function(1.0f); + // CHECK-HOST: call float @_Z23template_vs_hd_functionf(float + template_vs_hd_function(1); + // CHECK-HOST: call i32 @_Z23template_vs_hd_functionIiET_S0_(i32 +} + +// CHECK-DEVICE-LABEL: define void @_Z6d_thdfv() +__device__ void d_thdf() { + template_vs_hd_function(1.0f); + // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float + template_vs_hd_function(1); + // Host-only function template is not callable with strict call checks, + // so for device side HD function will be the only choice. + // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float +} + +// Check that overloads still work the same way on both host and +// device side when the overload set contains only functions from one +// side of compilation. +__device__ float device_only_function(int arg) { return 17; } +__device__ float device_only_function(float arg) { return 18; } + +__host__ float host_only_function(int arg) { return 19; } +__host__ float host_only_function(float arg) { return 20; } + +// CHECK-BOTH-LABEL: define void @_Z6hd_dofv() +__host__ __device__ void hd_dof() { +#ifdef NOCHECKS + device_only_function(1.0f); + // CHECK-BOTH-NC: call float @_Z20device_only_functionf(float + device_only_function(1); + // CHECK-BOTH-NC: call float @_Z20device_only_functioni(i32 + host_only_function(1.0f); + // CHECK-BOTH-NC: call float @_Z18host_only_functionf(float + host_only_function(1); + // CHECK-BOTH-NC: call float @_Z18host_only_functioni(i32 +#endif +} + + // CHECK-HOST-LABEL: define void @_Z5hostfv() __host__ void hostf(void) { -#if defined (NOCHECKS) - fp_t dp = d; // CHECK-HOST-NC: store {{.*}} @_Z1dv, {{.*}} %dp, - fp_t cdp = cd; // CHECK-HOST-NC: store {{.*}} @cd, {{.*}} %cdp, -#endif fp_t hp = h; // CHECK-HOST: store {{.*}} @_Z1hv, {{.*}} %hp, fp_t chp = ch; // CHECK-HOST: store {{.*}} @ch, {{.*}} %chp, fp_t dhp = dh; // CHECK-HOST: store {{.*}} @_Z2dhv, {{.*}} %dhp, @@ -91,10 +192,6 @@ fp_t chdp = chd; // CHECK-HOST: store {{.*}} @chd, {{.*}} %chdp, gp_t gp = g; // CHECK-HOST: store {{.*}} @_Z1gv, {{.*}} %gp, -#if defined (NOCHECKS) - d(); // CHECK-HOST-NC: call i32 @_Z1dv() - cd(); // CHECK-HOST-NC: call i32 @cd() -#endif h(); // CHECK-HOST: call i32 @_Z1hv() ch(); // CHECK-HOST: call i32 @ch() dh(); // CHECK-HOST: call i32 @_Z2dhv() @@ -106,10 +203,6 @@ __device__ void devicef(void) { fp_t dp = d; // CHECK-DEVICE: store {{.*}} @_Z1dv, {{.*}} %dp, fp_t cdp = cd; // CHECK-DEVICE: store {{.*}} @cd, {{.*}} %cdp, -#if defined (NOCHECKS) - fp_t hp = h; // CHECK-DEVICE-NC: store {{.*}} @_Z1hv, {{.*}} %hp, - fp_t chp = ch; // CHECK-DEVICE-NC: store {{.*}} @ch, {{.*}} %chp, -#endif fp_t dhp = dh; // CHECK-DEVICE: store {{.*}} @_Z2dhv, {{.*}} %dhp, fp_t cdhp = cdh; // CHECK-DEVICE: store {{.*}} @cdh, {{.*}} %cdhp, fp_t hdp = hd; // CHECK-DEVICE: store {{.*}} @_Z2hdv, {{.*}} %hdp, @@ -117,10 +210,6 @@ d(); // CHECK-DEVICE: call i32 @_Z1dv() cd(); // CHECK-DEVICE: call i32 @cd() -#if defined (NOCHECKS) - h(); // CHECK-DEVICE-NC: call i32 @_Z1hv() - ch(); // CHECK-DEVICE-NC: call i32 @ch() -#endif dh(); // CHECK-DEVICE: call i32 @_Z2dhv() cdh(); // CHECK-DEVICE: call i32 @cdh() } Index: cfe/trunk/test/SemaCUDA/function-overload.cu =================================================================== --- cfe/trunk/test/SemaCUDA/function-overload.cu +++ cfe/trunk/test/SemaCUDA/function-overload.cu @@ -70,13 +70,11 @@ __host__ void hostf(void) { fp_t dp = d; - fp_t cdp = cd; -#if !defined(NOCHECKS) - // expected-error@-3 {{reference to __device__ function 'd' in __host__ function}} + // expected-error@-1 {{reference to __device__ function 'd' in __host__ function}} // expected-note@65 {{'d' declared here}} - // expected-error@-4 {{reference to __device__ function 'cd' in __host__ function}} + fp_t cdp = cd; + // expected-error@-1 {{reference to __device__ function 'cd' in __host__ function}} // expected-note@68 {{'cd' declared here}} -#endif fp_t hp = h; fp_t chp = ch; fp_t dhp = dh; @@ -84,13 +82,11 @@ gp_t gp = g; d(); - cd(); -#if !defined(NOCHECKS) - // expected-error@-3 {{no matching function for call to 'd'}} + // expected-error@-1 {{no matching function for call to 'd'}} // expected-note@65 {{candidate function not viable: call to __device__ function from __host__ function}} - // expected-error@-4 {{no matching function for call to 'cd'}} + cd(); + // expected-error@-1 {{no matching function for call to 'cd'}} // expected-note@68 {{candidate function not viable: call to __device__ function from __host__ function}} -#endif h(); ch(); dh(); @@ -104,13 +100,11 @@ fp_t dp = d; fp_t cdp = cd; fp_t hp = h; - fp_t chp = ch; -#if !defined(NOCHECKS) - // expected-error@-3 {{reference to __host__ function 'h' in __device__ function}} + // expected-error@-1 {{reference to __host__ function 'h' in __device__ function}} // expected-note@66 {{'h' declared here}} - // expected-error@-4 {{reference to __host__ function 'ch' in __device__ function}} + fp_t chp = ch; + // expected-error@-1 {{reference to __host__ function 'ch' in __device__ function}} // expected-note@69 {{'ch' declared here}} -#endif fp_t dhp = dh; fp_t cdhp = cdh; gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __device__ function}} @@ -118,14 +112,10 @@ d(); cd(); - h(); - ch(); -#if !defined(NOCHECKS) - // expected-error@-3 {{no matching function for call to 'h'}} + h(); // expected-error {{no matching function for call to 'h'}} // expected-note@66 {{candidate function not viable: call to __host__ function from __device__ function}} - // expected-error@-4 {{no matching function for call to 'ch'}} + ch(); // expected-error {{no matching function for call to 'ch'}} // expected-note@69 {{candidate function not viable: call to __host__ function from __device__ function}} -#endif dh(); cdh(); g(); // expected-error {{no matching function for call to 'g'}} @@ -138,28 +128,25 @@ fp_t dp = d; fp_t cdp = cd; fp_t hp = h; - fp_t chp = ch; -#if !defined(NOCHECKS) - // expected-error@-3 {{reference to __host__ function 'h' in __global__ function}} + // expected-error@-1 {{reference to __host__ function 'h' in __global__ function}} // expected-note@66 {{'h' declared here}} - // expected-error@-4 {{reference to __host__ function 'ch' in __global__ function}} + fp_t chp = ch; + // expected-error@-1 {{reference to __host__ function 'ch' in __global__ function}} // expected-note@69 {{'ch' declared here}} -#endif fp_t dhp = dh; fp_t cdhp = cdh; - gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __global__ function}} - // expected-note@67 {{'g' declared here}} + gp_t gp = g; + // expected-error@-1 {{reference to __global__ function 'g' in __global__ function}} + // expected-note@67 {{'g' declared here}} d(); cd(); h(); - ch(); -#if !defined(NOCHECKS) - // expected-error@-3 {{no matching function for call to 'h'}} + // expected-error@-1 {{no matching function for call to 'h'}} // expected-note@66 {{candidate function not viable: call to __host__ function from __global__ function}} - // expected-error@-4 {{no matching function for call to 'ch'}} + ch(); + // expected-error@-1 {{no matching function for call to 'ch'}} // expected-note@69 {{candidate function not viable: call to __host__ function from __global__ function}} -#endif dh(); cdh(); g(); // expected-error {{no matching function for call to 'g'}}