Index: include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- include/clang/Basic/DiagnosticSemaKinds.td +++ include/clang/Basic/DiagnosticSemaKinds.td @@ -6067,6 +6067,9 @@ def err_ref_bad_target : Error< "reference to %select{__device__|__global__|__host__|__host__ __device__}0 " "function %1 in %select{__device__|__global__|__host__|__host__ __device__}2 function">; +def warn_host_calls_from_host_device : Warning< + "calling __host__ function %0 from __host__ __device__ function %1 can lead to runtime errors">, + InGroup; def warn_non_pod_vararg_with_format_string : Warning< "cannot pass %select{non-POD|non-trivial}0 object of type %1 to variadic " Index: include/clang/Basic/LangOptions.def =================================================================== --- include/clang/Basic/LangOptions.def +++ include/clang/Basic/LangOptions.def @@ -160,6 +160,7 @@ LANGOPT(CUDA , 1, 0, "CUDA") LANGOPT(OpenMP , 1, 0, "OpenMP support") LANGOPT(CUDAIsDevice , 1, 0, "Compiling for CUDA device") +LANGOPT(CUDAAllowHostCallsFromHostDevice, 1, 0, "Allow host device functions to call host functions") LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators") LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions") Index: include/clang/Driver/CC1Options.td =================================================================== --- include/clang/Driver/CC1Options.td +++ include/clang/Driver/CC1Options.td @@ -608,6 +608,9 @@ def fcuda_is_device : Flag<["-"], "fcuda-is-device">, HelpText<"Generate code for CUDA device">; +def fcuda_allow_host_calls_from_host_device : Flag<["-"], + "fcuda-allow-host-calls-from-host-device">, + HelpText<"Allow host device functions to call host functions">; } // let Flags = [CC1Option] Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -253,7 +253,7 @@ for (unsigned i = 0, e = checkers.size(); i != e; ++i) Opts.CheckersControlList.push_back(std::make_pair(checkers[i], enable)); } - + // Go through the analyzer configuration options. for (arg_iterator it = Args.filtered_begin(OPT_analyzer_config), ie = Args.filtered_end(); it != ie; ++it) { @@ -1393,6 +1393,9 @@ if (Args.hasArg(OPT_fcuda_is_device)) Opts.CUDAIsDevice = 1; + if (Args.hasArg(OPT_fcuda_allow_host_calls_from_host_device)) + Opts.CUDAAllowHostCallsFromHostDevice = 1; + if (Opts.ObjC1) { if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { StringRef value = arg->getValue(); Index: lib/Sema/SemaCUDA.cpp =================================================================== --- lib/Sema/SemaCUDA.cpp +++ lib/Sema/SemaCUDA.cpp @@ -92,9 +92,21 @@ if (Caller->isImplicit()) return false; bool InDeviceMode = getLangOpts().CUDAIsDevice; - if ((InDeviceMode && CalleeTarget != CFT_Device) || - (!InDeviceMode && CalleeTarget != CFT_Host)) + if (!InDeviceMode && CalleeTarget != CFT_Host) + return true; + if (InDeviceMode && CalleeTarget != CFT_Device) { + // Allow host device functions to call host functions if explicitly + // requested. + if (CalleeTarget == CFT_Host && + getLangOpts().CUDAAllowHostCallsFromHostDevice) { + Diag(Caller->getLocation(), + diag::warn_host_calls_from_host_device) + << Callee->getNameAsString() << Caller->getNameAsString(); + return false; + } + return true; + } } return false; Index: test/CodeGenCUDA/host-device-calls-host.cu =================================================================== --- test/CodeGenCUDA/host-device-calls-host.cu +++ test/CodeGenCUDA/host-device-calls-host.cu @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-allow-host-calls-from-host-device -fcuda-is-device -Wno-cuda-compat -emit-llvm -o - | FileCheck %s + +#include "Inputs/cuda.h" + +extern "C" +void host_function() {} + +// CHECK-LABEL: define void @hd_function_a +extern "C" +__host__ __device__ void hd_function_a() { + // CHECK: call void @host_function + host_function(); +} + +// CHECK: declare void @host_function + +// CHECK-LABEL: define void @hd_function_b +extern "C" +__host__ __device__ void hd_function_b(bool b) { if (b) host_function(); } + +// CHECK-LABEL: define void @device_function_b +extern "C" +__device__ void device_function_b() { hd_function_b(false); } + +// CHECK-LABEL: define void @global_function +extern "C" +__global__ void global_function() { + // CHECK: call void @device_function_b + device_function_b(); +} + +// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1} Index: test/SemaCUDA/function-target-hd.cu =================================================================== --- test/SemaCUDA/function-target-hd.cu +++ test/SemaCUDA/function-target-hd.cu @@ -0,0 +1,71 @@ +// Test the Sema analysis of caller-callee relationships of host device +// functions when compiling CUDA code. There are 4 permutations of this test as +// host and device compilation are separate compilation passes, and clang has +// an option to allow host calls from host device functions. __CUDA_ARCH__ is +// defined when compiling for the device and TEST_WARN_HD when host calls are +// allowed from host device functions. So for example, if __CUDA_ARCH__ is +// defined and TEST_WARN_HD is not then device compilation is happening but +// host device functions are not allowed to call device functions. + +// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s +// RUN: %clang_cc1 -fsyntax-only -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD +// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD + +#include "Inputs/cuda.h" + +__host__ void hd1h(void); +#if defined(__CUDA_ARCH__) && !defined(TEST_WARN_HD) +// expected-note@-2 {{candidate function not viable: call to __host__ function from __host__ __device__ function}} +#endif +__device__ void hd1d(void); +#ifndef __CUDA_ARCH__ +// expected-note@-2 {{candidate function not viable: call to __device__ function from __host__ __device__ function}} +#endif +__host__ void hd1hg(void); +__device__ void hd1dg(void); +#ifdef __CUDA_ARCH__ +__host__ void hd1hig(void); +#if !defined(TEST_WARN_HD) +// expected-note@-2 {{candidate function not viable: call to __host__ function from __host__ __device__ function}} +#endif +#else +__device__ void hd1dig(void); // expected-note {{candidate function not viable: call to __device__ function from __host__ __device__ function}} +#endif +__host__ __device__ void hd1hd(void); +__global__ void hd1g(void); // expected-note {{'hd1g' declared here}} + +__host__ __device__ void hd1(void) { +#if defined(TEST_WARN_HD) && defined(__CUDA_ARCH__) +// expected-warning@-2 {{calling __host__ function hd1h from __host__ __device__ function hd1}} +// expected-warning@-3 {{calling __host__ function hd1hig from __host__ __device__ function hd1}} +#endif + hd1d(); +#ifndef __CUDA_ARCH__ +// expected-error@-2 {{no matching function}} +#endif + hd1h(); +#if defined(__CUDA_ARCH__) && !defined(TEST_WARN_HD) +// expected-error@-2 {{no matching function}} +#endif + + // No errors as guarded +#ifdef __CUDA_ARCH__ + hd1d(); +#else + hd1h(); +#endif + + // Errors as incorrectly guarded +#ifndef __CUDA_ARCH__ + hd1dig(); // expected-error {{no matching function}} +#else + hd1hig(); +#ifndef TEST_WARN_HD +// expected-error@-2 {{no matching function}} +#endif +#endif + + hd1hd(); + hd1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'hd1g' in __host__ __device__ function}} +} Index: test/SemaCUDA/function-target.cu =================================================================== --- test/SemaCUDA/function-target.cu +++ test/SemaCUDA/function-target.cu @@ -31,41 +31,3 @@ d1hd(); d1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'd1g' in __device__ function}} } - -// Expected 0-1 as in one of host/device side compilation it is an error, while -// not in the other -__host__ void hd1h(void); // expected-note 0-1 {{candidate function not viable: call to __host__ function from __host__ __device__ function}} -__device__ void hd1d(void); // expected-note 0-1 {{candidate function not viable: call to __device__ function from __host__ __device__ function}} -__host__ void hd1hg(void); -__device__ void hd1dg(void); -#ifdef __CUDA_ARCH__ -__host__ void hd1hig(void); // expected-note {{candidate function not viable: call to __host__ function from __host__ __device__ function}} -#else -__device__ void hd1dig(void); // expected-note {{candidate function not viable: call to __device__ function from __host__ __device__ function}} -#endif -__host__ __device__ void hd1hd(void); -__global__ void hd1g(void); // expected-note {{'hd1g' declared here}} - -__host__ __device__ void hd1(void) { - // Expected 0-1 as in one of host/device side compilation it is an error, - // while not in the other - hd1d(); // expected-error 0-1 {{no matching function}} - hd1h(); // expected-error 0-1 {{no matching function}} - - // No errors as guarded -#ifdef __CUDA_ARCH__ - hd1d(); -#else - hd1h(); -#endif - - // Errors as incorrectly guarded -#ifndef __CUDA_ARCH__ - hd1dig(); // expected-error {{no matching function}} -#else - hd1hig(); // expected-error {{no matching function}} -#endif - - hd1hd(); - hd1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'hd1g' in __host__ __device__ function}} -}