Index: include/clang/Sema/Sema.h =================================================================== --- include/clang/Sema/Sema.h +++ include/clang/Sema/Sema.h @@ -3457,6 +3457,19 @@ bool DiagnoseMissing); bool isKnownName(StringRef name); + /// Status of the function emission on the CUDA/HIP/OpenMP host/device attrs. + enum class FunctionEmissionStatus { + Emitted, + CUDADiscarded, // Discarded due to CUDA/HIP hostness + OMPDiscarded, // Discarded due to OpenMP hostness + TemplateDiscarded, // Discarded due to uninstantiated templates + Unknown, + }; + FunctionEmissionStatus getEmissionStatus(FunctionDecl *Decl); + + // Whether the callee should be ignored in CUDA/HIP/OpenMP host/device check. + bool shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee); + void ArgumentDependentLookup(DeclarationName Name, SourceLocation Loc, ArrayRef Args, ADLResult &Functions); Index: lib/Sema/SemaCUDA.cpp =================================================================== --- lib/Sema/SemaCUDA.cpp +++ lib/Sema/SemaCUDA.cpp @@ -603,36 +603,7 @@ // Do we know that we will eventually codegen the given function? static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { - // Templates are emitted when they're instantiated. - if (FD->isDependentContext()) - return false; - - // When compiling for device, host functions are never emitted. Similarly, - // when compiling for host, device and global functions are never emitted. - // (Technically, we do emit a host-side stub for global functions, but this - // doesn't count for our purposes here.) - Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD); - if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host) - return false; - if (!S.getLangOpts().CUDAIsDevice && - (T == Sema::CFT_Device || T == Sema::CFT_Global)) - return false; - - // Check whether this function is externally visible -- if so, it's - // known-emitted. - // - // We have to check the GVA linkage of the function's *definition* -- if we - // only have a declaration, we don't know whether or not the function will be - // emitted, because (say) the definition could include "inline". - FunctionDecl *Def = FD->getDefinition(); - - if (Def && - !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))) - return true; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return S.DeviceKnownEmittedFns.count(FD) > 0; + return S.getEmissionStatus(FD) == Sema::FunctionEmissionStatus::Emitted; } Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, @@ -707,7 +678,7 @@ if (CallerKnownEmitted) { // Host-side references to a __global__ function refer to the stub, so the // function itself is never emitted and therefore should not be marked. - if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) + if (!shouldIgnoreInHostDeviceCheck(Callee)) markKnownEmitted(*this, Caller, Callee, Loc, IsKnownEmitted); } else { // If we have @@ -716,7 +687,7 @@ // omitting at the call to the kernel from the callgraph. This ensures // that, when compiling for host, only HD functions actually called from the // host get marked as known-emitted. - if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) + if (!shouldIgnoreInHostDeviceCheck(Callee)) DeviceCallGraph[Caller].insert({Callee, Loc}); } Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -17609,3 +17609,71 @@ Decl *Sema::getObjCDeclContext() const { return (dyn_cast_or_null(CurContext)); } + +Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD) { + // Templates are emitted when they're instantiated. + if (FD->isDependentContext()) + return FunctionEmissionStatus::TemplateDiscarded; + + if (LangOpts.CUDA) { + // When compiling for device, host functions are never emitted. Similarly, + // when compiling for host, device and global functions are never emitted. + // (Technically, we do emit a host-side stub for global functions, but this + // doesn't count for our purposes here.) + Sema::CUDAFunctionTarget T = IdentifyCUDATarget(FD); + if (LangOpts.CUDAIsDevice && T == Sema::CFT_Host) + return FunctionEmissionStatus::CUDADiscarded; + if (!LangOpts.CUDAIsDevice && + (T == Sema::CFT_Device || T == Sema::CFT_Global)) + return FunctionEmissionStatus::CUDADiscarded; + + // Check whether this function is externally visible -- if so, it's + // known-emitted. + // + // We have to check the GVA linkage of the function's *definition* -- if we + // only have a declaration, we don't know whether or not the function will + // be emitted, because (say) the definition could include "inline". + FunctionDecl *Def = FD->getDefinition(); + + if (Def && + !isDiscardableGVALinkage(getASTContext().GetGVALinkageForFunction(Def))) + return FunctionEmissionStatus::Emitted; + } else if (LangOpts.OpenMPIsDevice) { + Optional DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); + if (DevTy.hasValue()) + return (*DevTy == OMPDeclareTargetDeclAttr::DT_Host) + ? FunctionEmissionStatus::OMPDiscarded + : FunctionEmissionStatus::Emitted; + } else if (LangOpts.OpenMP) { + // In OpenMP 4.5 all the functions are host functions. + if (LangOpts.OpenMP <= 45) + return FunctionEmissionStatus::Emitted; + + Optional DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); + if (DevTy.hasValue()) + return (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) + ? FunctionEmissionStatus::OMPDiscarded + : FunctionEmissionStatus::Emitted; + } + + // Otherwise, the function is known-emitted if it's in our set of + // known-emitted functions. + return (DeviceKnownEmittedFns.count(FD) > 0) + ? FunctionEmissionStatus::Emitted + : FunctionEmissionStatus::Unknown; +} + +bool Sema::shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee) { + // Host-side references to a __global__ function refer to the stub, so the + // function itself is never emitted and therefore should not be marked. + // If we have + // host fn calls kernel fn calls host+device, + // the HD function does not get instantiated on the host. We model this by + // omitting at the call to the kernel from the callgraph. This ensures + // that, when compiling for host, only HD functions actually called from the + // host get marked as known-emitted. + return LangOpts.CUDA && !LangOpts.CUDAIsDevice && + IdentifyCUDATarget(Callee) == CFT_Global; +} Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -1566,25 +1566,11 @@ } // anonymous namespace /// Do we know that we will eventually codegen the given function? -static FunctionEmissionStatus isKnownDeviceEmitted(Sema &S, FunctionDecl *FD) { +static Sema::FunctionEmissionStatus isKnownDeviceEmitted(Sema &S, + FunctionDecl *FD) { assert(S.LangOpts.OpenMP && S.LangOpts.OpenMPIsDevice && "Expected OpenMP device compilation."); - // Templates are emitted when they're instantiated. - if (FD->isDependentContext()) - return FunctionEmissionStatus::Discarded; - - Optional DevTy = - OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); - if (DevTy.hasValue()) - return (*DevTy == OMPDeclareTargetDeclAttr::DT_Host) - ? FunctionEmissionStatus::Discarded - : FunctionEmissionStatus::Emitted; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return (S.DeviceKnownEmittedFns.count(FD) > 0) - ? FunctionEmissionStatus::Emitted - : FunctionEmissionStatus::Unknown; + return S.getEmissionStatus(FD); } Sema::DeviceDiagBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc, @@ -1602,7 +1588,9 @@ Kind = isOpenMPDeviceDelayedContext(*this) ? DeviceDiagBuilder::K_Deferred : DeviceDiagBuilder::K_Immediate; break; - case FunctionEmissionStatus::Discarded: + case FunctionEmissionStatus::TemplateDiscarded: + case FunctionEmissionStatus::OMPDiscarded: + case FunctionEmissionStatus::CUDADiscarded: Kind = DeviceDiagBuilder::K_Nop; break; } @@ -1611,25 +1599,11 @@ } /// Do we know that we will eventually codegen the given function? -static FunctionEmissionStatus isKnownHostEmitted(Sema &S, FunctionDecl *FD) { +static Sema::FunctionEmissionStatus isKnownHostEmitted(Sema &S, + FunctionDecl *FD) { assert(S.LangOpts.OpenMP && !S.LangOpts.OpenMPIsDevice && "Expected OpenMP host compilation."); - // In OpenMP 4.5 all the functions are host functions. - if (S.LangOpts.OpenMP <= 45) - return FunctionEmissionStatus::Emitted; - - Optional DevTy = - OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); - if (DevTy.hasValue()) - return (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) - ? FunctionEmissionStatus::Discarded - : FunctionEmissionStatus::Emitted; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return (S.DeviceKnownEmittedFns.count(FD) > 0) - ? FunctionEmissionStatus::Emitted - : FunctionEmissionStatus::Unknown; + return S.getEmissionStatus(FD); } Sema::DeviceDiagBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc, @@ -1646,7 +1620,9 @@ case FunctionEmissionStatus::Unknown: Kind = DeviceDiagBuilder::K_Deferred; break; - case FunctionEmissionStatus::Discarded: + case FunctionEmissionStatus::TemplateDiscarded: + case FunctionEmissionStatus::OMPDiscarded: + case FunctionEmissionStatus::CUDADiscarded: Kind = DeviceDiagBuilder::K_Nop; break; } @@ -1669,7 +1645,7 @@ isKnownDeviceEmitted(*this, Caller) == FunctionEmissionStatus::Unknown)) && isKnownDeviceEmitted(*this, Callee) == - FunctionEmissionStatus::Discarded) { + FunctionEmissionStatus::OMPDiscarded) { StringRef HostDevTy = getOpenMPSimpleClauseTypeName(OMPC_device_type, OMPC_DEVICE_TYPE_host); Diag(Loc, diag::err_omp_wrong_device_function_call) << HostDevTy << 0; @@ -1705,7 +1681,8 @@ // device only function are not available on the host. if (Caller && isKnownHostEmitted(*this, Caller) == FunctionEmissionStatus::Emitted && - isKnownHostEmitted(*this, Callee) == FunctionEmissionStatus::Discarded) { + isKnownHostEmitted(*this, Callee) == + FunctionEmissionStatus::OMPDiscarded) { StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName( OMPC_device_type, OMPC_DEVICE_TYPE_nohost); Diag(Loc, diag::err_omp_wrong_device_function_call) << NoHostDevTy << 1; @@ -1716,16 +1693,18 @@ } // If the caller is known-emitted, mark the callee as known-emitted. // Otherwise, mark the call in our call graph so we can traverse it later. - if ((!CheckCaller && !Caller) || - (Caller && - isKnownHostEmitted(*this, Caller) == FunctionEmissionStatus::Emitted)) - markKnownEmitted( - *this, Caller, Callee, Loc, [CheckCaller](Sema &S, FunctionDecl *FD) { - return CheckCaller && - isKnownHostEmitted(S, FD) == FunctionEmissionStatus::Emitted; - }); - else if (Caller) - DeviceCallGraph[Caller].insert({Callee, Loc}); + if (!shouldIgnoreInHostDeviceCheck(Callee)) { + if ((!CheckCaller && !Caller) || + (Caller && + isKnownHostEmitted(*this, Caller) == FunctionEmissionStatus::Emitted)) + markKnownEmitted( + *this, Caller, Callee, Loc, [CheckCaller](Sema &S, FunctionDecl *FD) { + return CheckCaller && + isKnownHostEmitted(S, FD) == FunctionEmissionStatus::Emitted; + }); + else if (Caller) + DeviceCallGraph[Caller].insert({Callee, Loc}); + } } void Sema::checkOpenMPDeviceExpr(const Expr *E) { Index: test/SemaCUDA/call-device-fn-from-host.cu =================================================================== --- test/SemaCUDA/call-device-fn-from-host.cu +++ test/SemaCUDA/call-device-fn-from-host.cu @@ -1,5 +1,7 @@ // RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \ // RUN: -verify -verify-ignore-unexpected=note +// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \ +// RUN: -verify -verify-ignore-unexpected=note -fopenmp // Note: This test won't work with -fsyntax-only, because some of these errors // are emitted during codegen. Index: test/SemaCUDA/host-device-constexpr.cu =================================================================== --- test/SemaCUDA/host-device-constexpr.cu +++ test/SemaCUDA/host-device-constexpr.cu @@ -1,5 +1,10 @@ // RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s -fcuda-is-device +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s \ +// RUN: -fcuda-is-device +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs \ +// RUN: -fopenmp %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs \ +// RUN: -fopenmp %s -fcuda-is-device #include "Inputs/cuda.h" Index: test/SemaCUDA/openmp-static-func.cu =================================================================== --- /dev/null +++ test/SemaCUDA/openmp-static-func.cu @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -verify -fopenmp %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -verify -fopenmp -x hip %s +// expected-no-diagnostics + +// Tests there is no assertion in Sema::markKnownEmitted when fopenmp is used +// with CUDA/HIP host compilation. + +static void f() {} + +static void g() { f(); } + +static void h() { g(); }