Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -231,6 +231,15 @@ (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) return CFP_Native; + // StdPar mode is special, in that assessing whether a device side call to a + // host target is deferred to a subsequent pass, and cannot unambiguously be + // adjudicated in the AST, hence we optimistically allow them to pass here. + if (getLangOpts().HIPStdPar && + (CallerTarget == CFT_Global || CallerTarget == CFT_Device || + CallerTarget == CFT_HostDevice) && + CalleeTarget == CFT_Host) + return CFP_HostDevice; + // (d) HostDevice behavior depends on compilation mode. if (CallerTarget == CFT_HostDevice) { // It's OK to call a compilation-mode matching function from an HD one. Index: clang/lib/Sema/SemaExpr.cpp =================================================================== --- clang/lib/Sema/SemaExpr.cpp +++ clang/lib/Sema/SemaExpr.cpp @@ -19106,7 +19106,7 @@ // Diagnose ODR-use of host global variables in device functions. // Reference of device global variables in host functions is allowed // through shadow variables therefore it is not diagnosed. - if (SemaRef.LangOpts.CUDAIsDevice) { + if (SemaRef.LangOpts.CUDAIsDevice && !SemaRef.LangOpts.HIPStdPar) { SemaRef.targetDiag(Loc, diag::err_ref_bad_target) << /*host*/ 2 << /*variable*/ 1 << Var << UserTarget; SemaRef.targetDiag(Var->getLocation(), Index: clang/lib/Sema/SemaStmtAsm.cpp =================================================================== --- clang/lib/Sema/SemaStmtAsm.cpp +++ clang/lib/Sema/SemaStmtAsm.cpp @@ -271,7 +271,8 @@ OutputName = Names[i]->getName(); TargetInfo::ConstraintInfo Info(Literal->getString(), OutputName); - if (!Context.getTargetInfo().validateOutputConstraint(Info)) { + if (!Context.getTargetInfo().validateOutputConstraint(Info) && + !(LangOpts.HIPStdPar && LangOpts.CUDAIsDevice)) { targetDiag(Literal->getBeginLoc(), diag::err_asm_invalid_output_constraint) << Info.getConstraintStr(); Index: clang/test/SemaStdPar/device-can-call-host.cpp =================================================================== --- /dev/null +++ clang/test/SemaStdPar/device-can-call-host.cpp @@ -0,0 +1,91 @@ +// RUN: %clang %s --stdpar --stdpar-path=%S/Inputs \ +// RUN: --stdpar-thrust-path=%S/Inputs --stdpar-prim-path=%S/Inputs \ +// RUN: --offload-device-only -emit-llvm -o /dev/null -Xclang -verify + +// Note: These would happen implicitly, within the implementation of the +// accelerator specific algorithm library, and not from user code. + +// Calls from the accelerator side to implicitly host (i.e. unannotated) +// functions are fine. + +// expected-no-diagnostics + +extern "C" void host_fn() {} + +struct Dummy {}; + +struct S { + S() {} + ~S() { host_fn(); } + + int x; +}; + +struct T { + __device__ void hd() { host_fn(); } + + __device__ void hd3(); + + void h() {} + + void operator+(); + void operator-(const T&) {} + + operator Dummy() { return Dummy(); } +}; + +__device__ void T::hd3() { host_fn(); } + +template __device__ void hd2() { host_fn(); } + +__global__ void kernel() { hd2(); } + +__device__ void hd() { host_fn(); } + +template __device__ void hd3() { host_fn(); } +__device__ void device_fn() { hd3(); } + +__device__ void local_var() { + S s; +} + +__device__ void explicit_destructor(S *s) { + s->~S(); +} + +__device__ void hd_member_fn() { + T t; + + t.hd(); +} + +__device__ void h_member_fn() { + T t; + t.h(); +} + +__device__ void unaryOp() { + T t; + (void) +t; +} + +__device__ void binaryOp() { + T t; + (void) (t - t); +} + +__device__ void implicitConversion() { + T t; + Dummy d = t; +} + +template +struct TmplStruct { + template __device__ void fn() {} +}; + +template <> +template <> +__device__ void TmplStruct::fn() { host_fn(); } + +__device__ void double_specialization() { TmplStruct().fn(); }