diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1109,6 +1109,13 @@ llvm::GlobalVariable *Addr, bool PerformInit); + /// Emit code for handling declare target functions in the runtime. + /// \param FD Declare target function. + /// \param Addr Address of the global \a FD. + /// \param PerformInit true if initialization expression is not constant. + virtual void emitDeclareTargetFunction(const FunctionDecl *FD, + llvm::GlobalValue *GV); + /// Creates artificial threadprivate variable with name \p Name and type \p /// VarType. /// \param VarType Type of the artificial threadprivate variable. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1991,6 +1991,41 @@ return CGM.getLangOpts().OpenMPIsTargetDevice; } +void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD, + llvm::GlobalValue *GV) { + std::optional ActiveAttr = + OMPDeclareTargetDeclAttr::getActiveAttr(FD); + + // We only need to handle active 'indirect' declare target functions. + if (!ActiveAttr || !(*ActiveAttr)->getIndirect()) + return; + + // Get a mangled name to store the new device global in. + llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( + CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName()); + SmallString<128> Name; + OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo); + + // We need to generate a new global to hold the address of the indirectly + // called device function. Doing this allows us to keep the visibility and + // linkage of the associated function unchanged while allowing the runtime to + // access its value. + llvm::GlobalValue *Addr = GV; + if (CGM.getLangOpts().OpenMPIsTargetDevice) { + Addr = new llvm::GlobalVariable( + CGM.getModule(), CGM.VoidPtrTy, + /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name, + nullptr, llvm::GlobalValue::NotThreadLocal, + CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace()); + Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility); + } + + OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( + Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(), + llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect, + llvm::GlobalValue::WeakODRLinkage); +} + Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -5685,6 +5685,8 @@ AddGlobalDtor(Fn, DA->getPriority(), true); if (D->hasAttr()) AddGlobalAnnotations(D, Fn); + if (getLangOpts().OpenMP && D->hasAttr()) + getOpenMPRuntime().emitDeclareTargetFunction(D, GV); } void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { diff --git a/clang/test/OpenMP/target_indirect_codegen.cpp b/clang/test/OpenMP/target_indirect_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_indirect_codegen.cpp @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -triple amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefix=DEVICE +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -triple amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -emit-pch -o %t +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -triple amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -include-pch %t -o - | FileCheck %s --check-prefix=DEVICE + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +//. +// HOST: @[[VAR:.+]] = global i8 0, align 1 +// HOST: @[[FOO_ENTRY_NAME:.+]] = internal unnamed_addr constant [{{[0-9]+}} x i8] c"[[FOO_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_foo_l[0-9]+]]\00" +// HOST: @.omp_offloading.entry.[[FOO_NAME]] = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @[[FOO_ENTRY_NAME]], i64 8, i32 4, i32 0 }, section "omp_offloading_entries", align 1 +// HOST: @[[BAZ_ENTRY_NAME:.+]] = internal unnamed_addr constant [{{[0-9]+}} x i8] c"[[BAZ_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_baz_l[0-9]+]]\00" +// HOST: @.omp_offloading.entry.[[BAZ_NAME]] = weak constant %struct.__tgt_offload_entry { ptr @_Z3bazv, ptr @[[BAZ_ENTRY_NAME]], i64 8, i32 4, i32 0 }, section "omp_offloading_entries", align 1 +// HOST: @[[VAR_ENTRY_NAME:.+]] = internal unnamed_addr constant [4 x i8] c"var\00" +// HOST: @.omp_offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @[[VAR]], ptr @[[VAR_ENTRY_NAME]], i64 1, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// HOST: @[[BAR_ENTRY_NAME:.+]] = internal unnamed_addr constant [{{[0-9]+}} x i8] c"[[BAR_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_bar_l[0-9]+]]\00" +// HOST: @.omp_offloading.entry.[[BAR_NAME]] = weak constant %struct.__tgt_offload_entry { ptr @_ZL3barv, ptr @[[BAR_ENTRY_NAME]], i64 8, i32 4, i32 0 }, section "omp_offloading_entries", align 1 +//. +// DEVICE: @[[FOO_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_foo_l[0-9]+]] = protected addrspace(1) constant ptr @_Z3foov +// DEVICE: @[[BAZ_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_baz_l[0-9]+]] = protected addrspace(1) constant ptr @_Z3bazv +// DEVICE: @var = protected addrspace(1) global i8 0, align 1 +// DEVICE: @[[BAR_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_bar_l[0-9]+]] = protected addrspace(1) constant ptr @_ZL3barv +//. +void foo() { } +#pragma omp declare target to(foo) indirect + +static void bar() { } +#pragma omp declare target to(bar) indirect + +[[gnu::visibility("hidden")]] void baz() { bar(); } +#pragma omp declare target to(baz) indirect + +static void unused() { }; +#pragma omp declare target to(unused) indirect + +void disabled() { }; +#pragma omp declare target to(disabled) indirect(false) + +char var = 0; +#pragma omp declare target to(var) indirect + +#endif +//. +// HOST-DAG: !{{[0-9]+}} = !{i32 1, !"[[FOO_NAME]]", i32 4, i32 0} +// HOST-DAG: !{{[0-9]+}} = !{i32 1, !"[[BAZ_NAME]]", i32 4, i32 1} +// HOST-DAG: !{{[0-9]+}} = !{i32 1, !"var", i32 0, i32 2} +// HOST-DAG: !{{[0-9]+}} = !{i32 1, !"[[BAR_NAME]]", i32 4, i32 3} +//. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -326,6 +326,8 @@ OMPTargetGlobalVarEntryEnter = 0x2, /// Mark the entry as having no declare target entry kind. OMPTargetGlobalVarEntryNone = 0x3, + /// Mark the entry as a declare target indirect global. + OMPTargetGlobalVarEntryIndirect = 0x4, }; /// Kind of device clause for declare target variables @@ -349,6 +351,7 @@ /// Type of the global variable. int64_t VarSize; GlobalValue::LinkageTypes Linkage; + const std::string VarName; public: OffloadEntryInfoDeviceGlobalVar() @@ -359,13 +362,15 @@ explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, - GlobalValue::LinkageTypes Linkage) + GlobalValue::LinkageTypes Linkage, + const std::string &VarName) : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags), - VarSize(VarSize), Linkage(Linkage) { + VarSize(VarSize), Linkage(Linkage), VarName(VarName) { setAddress(Addr); } int64_t getVarSize() const { return VarSize; } + StringRef getVarName() const { return VarName; } void setVarSize(int64_t Size) { VarSize = Size; } GlobalValue::LinkageTypes getLinkage() const { return Linkage; } void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; } @@ -1734,7 +1739,8 @@ /// Creates offloading entry for the provided entry ID \a ID, address \a /// Addr, size \a Size, and flags \a Flags. void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, - int32_t Flags, GlobalValue::LinkageTypes); + int32_t Flags, GlobalValue::LinkageTypes, + StringRef Name = ""); /// The kind of errors that can occur when emitting the offload entries and /// metadata. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -5525,9 +5525,10 @@ void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, - GlobalValue::LinkageTypes) { + GlobalValue::LinkageTypes, + StringRef Name) { if (!Config.isGPU()) { - emitOffloadingEntry(ID, Addr->getName(), Size, Flags); + emitOffloadingEntry(ID, Name.empty() ? Addr->getName() : Name, Size, Flags); return; } // TODO: Add support for global variables on the device after declare target @@ -5687,13 +5688,20 @@ // Hidden or internal symbols on the device are not externally visible. // We should not attempt to register them by creating an offloading - // entry. + // entry. Indirect variables are handled separately on the device. if (auto *GV = dyn_cast(CE->getAddress())) - if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) + if ((GV->hasLocalLinkage() || GV->hasHiddenVisibility()) && + Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect) continue; - createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(), - Flags, CE->getLinkage()); + // Indirect globals need to use a special name that doesn't match the name + // of the associated host global. + if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect) + createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(), + Flags, CE->getLinkage(), CE->getVarName()); + else + createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(), + Flags, CE->getLinkage()); } else { llvm_unreachable("Unsupported entry kind."); @@ -6038,8 +6046,13 @@ } return; } - OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum, - Addr, VarSize, Flags, Linkage); + if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect) + OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum, + Addr, VarSize, Flags, Linkage, + VarName.str()); + else + OffloadEntriesDeviceGlobalVar.try_emplace( + VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage, ""); ++OffloadingEntriesNum; } }