diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -249,6 +249,15 @@ (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) return CFP_Native; + // HipStdPar mode is special, in that assessing whether a device side call to + // a host target is deferred to a subsequent pass, and cannot unambiguously be + // adjudicated in the AST, hence we optimistically allow them to pass here. + if (getLangOpts().HIPStdPar && + (CallerTarget == CFT_Global || CallerTarget == CFT_Device || + CallerTarget == CFT_HostDevice) && + CalleeTarget == CFT_Host) + return CFP_HostDevice; + // (d) HostDevice behavior depends on compilation mode. if (CallerTarget == CFT_HostDevice) { // It's OK to call a compilation-mode matching function from an HD one. @@ -895,7 +904,7 @@ if (!ShouldCheck || !Capture.isReferenceCapture()) return; auto DiagKind = SemaDiagnosticBuilder::K_Deferred; - if (Capture.isVariableCapture()) { + if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) { SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), diag::err_capture_bad_target, Callee, *this) << Capture.getVariable(); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -19157,7 +19157,7 @@ // Diagnose ODR-use of host global variables in device functions. // Reference of device global variables in host functions is allowed // through shadow variables therefore it is not diagnosed. - if (SemaRef.LangOpts.CUDAIsDevice) { + if (SemaRef.LangOpts.CUDAIsDevice && !SemaRef.LangOpts.HIPStdPar) { SemaRef.targetDiag(Loc, diag::err_ref_bad_target) << /*host*/ 2 << /*variable*/ 1 << Var << UserTarget; SemaRef.targetDiag(Var->getLocation(), diff --git a/clang/lib/Sema/SemaStmtAsm.cpp b/clang/lib/Sema/SemaStmtAsm.cpp --- a/clang/lib/Sema/SemaStmtAsm.cpp +++ b/clang/lib/Sema/SemaStmtAsm.cpp @@ -271,7 +271,8 @@ OutputName = Names[i]->getName(); TargetInfo::ConstraintInfo Info(Literal->getString(), OutputName); - if (!Context.getTargetInfo().validateOutputConstraint(Info)) { + if (!Context.getTargetInfo().validateOutputConstraint(Info) && + !(LangOpts.HIPStdPar && LangOpts.CUDAIsDevice)) { targetDiag(Literal->getBeginLoc(), diag::err_asm_invalid_output_constraint) << Info.getConstraintStr(); diff --git a/clang/test/SemaHipStdPar/device-can-call-host.cpp b/clang/test/SemaHipStdPar/device-can-call-host.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaHipStdPar/device-can-call-host.cpp @@ -0,0 +1,93 @@ +// RUN: %clang_cc1 -x hip %s --hipstdpar -triple amdgcn-amd-amdhsa --std=c++17 \ +// RUN: -fcuda-is-device -emit-llvm -o /dev/null -verify + +// Note: These would happen implicitly, within the implementation of the +// accelerator specific algorithm library, and not from user code. + +// Calls from the accelerator side to implicitly host (i.e. unannotated) +// functions are fine. + +// expected-no-diagnostics + +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) + +extern "C" void host_fn() {} + +struct Dummy {}; + +struct S { + S() {} + ~S() { host_fn(); } + + int x; +}; + +struct T { + __device__ void hd() { host_fn(); } + + __device__ void hd3(); + + void h() {} + + void operator+(); + void operator-(const T&) {} + + operator Dummy() { return Dummy(); } +}; + +__device__ void T::hd3() { host_fn(); } + +template __device__ void hd2() { host_fn(); } + +__global__ void kernel() { hd2(); } + +__device__ void hd() { host_fn(); } + +template __device__ void hd3() { host_fn(); } +__device__ void device_fn() { hd3(); } + +__device__ void local_var() { + S s; +} + +__device__ void explicit_destructor(S *s) { + s->~S(); +} + +__device__ void hd_member_fn() { + T t; + + t.hd(); +} + +__device__ void h_member_fn() { + T t; + t.h(); +} + +__device__ void unaryOp() { + T t; + (void) +t; +} + +__device__ void binaryOp() { + T t; + (void) (t - t); +} + +__device__ void implicitConversion() { + T t; + Dummy d = t; +} + +template +struct TmplStruct { + template __device__ void fn() {} +}; + +template <> +template <> +__device__ void TmplStruct::fn() { host_fn(); } + +__device__ void double_specialization() { TmplStruct().fn(); }