Index: lib/CodeGen/CGDeclCXX.cpp =================================================================== --- lib/CodeGen/CGDeclCXX.cpp +++ lib/CodeGen/CGDeclCXX.cpp @@ -74,7 +74,7 @@ // bails even if the attribute is not present. if (D.isNoDestroy(CGF.getContext())) return; - + CodeGenModule &CGM = CGF.CGM; // FIXME: __attribute__((cleanup)) ? Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -2294,6 +2294,18 @@ return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } +static Address emitDeclTargetToVarDeclLValue(CodeGenFunction &CGF, + const VarDecl *VD, QualType T) { + llvm::Optional Res = + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); + if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) + return Address::invalid(); + assert(*Res == OMPDeclareTargetDeclAttr::MT_To && "Expected to clause"); + QualType PtrTy = CGF.getContext().getPointerType(VD->getType()); + Address Addr = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetToUnderUnifiedMem(VD); + return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs()); +} + static Address emitDeclTargetLinkVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD, QualType T) { llvm::Optional Res = @@ -2359,6 +2371,9 @@ // device codegen. if (CGF.getLangOpts().OpenMPIsDevice) { Address Addr = emitDeclTargetLinkVarDeclLValue(CGF, VD, T); + if (!Addr.isValid() && + CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) + Addr = emitDeclTargetToVarDeclLValue(CGF, VD, T); if (Addr.isValid()) return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -1120,6 +1120,8 @@ Address VDAddr, SourceLocation Loc); + virtual Address getAddrOfDeclareTargetToUnderUnifiedMem(const VarDecl *VD); + /// Returns the address of the variable marked as declare target with link /// clause. virtual Address getAddrOfDeclareTargetLink(const VarDecl *VD); Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -2531,11 +2531,16 @@ return Address::invalid(); llvm::Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { + if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + HasRequiresUnifiedSharedMemory))) { SmallString<64> PtrName; { llvm::raw_svector_ostream OS(PtrName); - OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; + if (*Res == OMPDeclareTargetDeclAttr::MT_Link) + OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; + else + OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_to_ptr"; } llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); if (!Ptr) { @@ -2555,6 +2560,11 @@ return Address::invalid(); } +Address +CGOpenMPRuntime::getAddrOfDeclareTargetToUnderUnifiedMem(const VarDecl *VD) { + return getAddrOfDeclareTargetLink(VD); +} + llvm::Constant * CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { assert(!CGM.getLangOpts().OpenMPUseTLS || @@ -2752,7 +2762,9 @@ bool PerformInit) { Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) + if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + HasRequiresUnifiedSharedMemory)) return CGM.getLangOpts().OpenMPIsDevice; VD = VD->getDefinition(CGM.getContext()); if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) @@ -4168,6 +4180,9 @@ CE->getFlags()); switch (Flags) { case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { + if (CGM.getLangOpts().OpenMPIsDevice && + CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) + continue; if (!CE->getAddress()) { unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, @@ -7439,11 +7454,18 @@ if (const auto *VD = dyn_cast_or_null(I->getAssociatedDeclaration())) { if (llvm::Optional Res = - OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { IsLink = true; BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); } + if (*Res == OMPDeclareTargetDeclAttr::MT_To && + CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) { + // TODO: Make this into a flag for TO with unified memory. + IsLink = true; + BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetToUnderUnifiedMem(VD); + } + } } // If the variable is a pointer and is being dereferenced (i.e. is not @@ -9087,7 +9109,9 @@ llvm::Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( cast(GD.getDecl())); - if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) { + if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + HasRequiresUnifiedSharedMemory)) { DeferredGlobalVariables.insert(cast(GD.getDecl())); return true; } @@ -9149,6 +9173,18 @@ switch (*Res) { case OMPDeclareTargetDeclAttr::MT_To: Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; + if (HasRequiresUnifiedSharedMemory) { + if (CGM.getLangOpts().OpenMPIsDevice) { + VarName = Addr->getName(); + Addr = nullptr; + } else { + VarName = getAddrOfDeclareTargetToUnderUnifiedMem(VD).getName(); + Addr = cast(getAddrOfDeclareTargetToUnderUnifiedMem(VD).getPointer()); + } + VarSize = CGM.getPointerSize(); + Linkage = llvm::GlobalValue::WeakAnyLinkage; + break; + } VarName = CGM.getMangledName(VD); if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); @@ -9202,12 +9238,17 @@ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) continue; - if (*Res == OMPDeclareTargetDeclAttr::MT_To) { + if (*Res == OMPDeclareTargetDeclAttr::MT_To && + !HasRequiresUnifiedSharedMemory) { CGM.EmitGlobal(VD); } else { - assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && + assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || + *Res == OMPDeclareTargetDeclAttr::MT_To) && "Expected to or link clauses."); - (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + if (*Res == OMPDeclareTargetDeclAttr::MT_To) + (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetToUnderUnifiedMem(VD); + else + (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); } } } Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -2477,10 +2477,13 @@ if (llvm::Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { if (*Res == OMPDeclareTargetDeclAttr::MT_To) { - (void)GetAddrOfGlobalVar(VD); + if (getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) + (void)getOpenMPRuntime().getAddrOfDeclareTargetToUnderUnifiedMem(VD); + else + (void)GetAddrOfGlobalVar(VD); } else { assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && - "link claue expected."); + "link clause expected."); (void)getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); } return; Index: test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp =================================================================== --- test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp +++ test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp @@ -8,16 +8,18 @@ #define N 1000 double var = 10.0; +double to_var = 20.0; #pragma omp requires unified_shared_memory #pragma omp declare target link(var) +#pragma omp declare target to(to_var) int bar(int n){ double sum = 0; #pragma omp target for(int i = 0; i < n; i++) { - sum += var; + sum += var + to_var; } return sum; @@ -26,9 +28,20 @@ // CHECK: [[VAR:@.+]] = global double 1.000000e+01 // CHECK: [[VAR_DECL_TGT_LINK_PTR:@.+]] = global double* [[VAR]] +// CHECK: [[TO_VAR:@.+]] = global double 2.000000e+01 +// CHECK: [[VAR_DECL_TGT_TO_PTR:@.+]] = global double* [[TO_VAR]] + // CHECK: [[OFFLOAD_SIZES:@.+]] = private unnamed_addr constant [2 x i64] [i64 4, i64 8] // CHECK: [[OFFLOAD_MAPTYPES:@.+]] = private unnamed_addr constant [2 x i64] [i64 800, i64 800] +// CHECK: [[OMP_OFFLOAD_ENTRY_LINK_VAR_PTR_NAME:@.+]] = internal unnamed_addr constant [22 x i8] +// CHECK: [[OMP_OFFLOAD_ENTRY_LINK_VAR_PTR:@.+]] = weak constant %struct.__tgt_offload_entry { i8* bitcast (double** [[VAR_DECL_TGT_LINK_PTR]] to i8*), i8* getelementptr inbounds ([22 x i8], [22 x i8]* [[OMP_OFFLOAD_ENTRY_LINK_VAR_PTR_NAME]], i32 0, i32 0), i64 8, i32 1, i32 0 }, section ".omp_offloading.entries" + +// CHECK: [[OMP_OFFLOAD_ENTRY_TO_VAR_PTR_NAME:@.+]] = internal unnamed_addr constant [23 x i8] +// CHECK: [[OMP_OFFLOAD_ENTRY_TO_VAR_PTR:@.+]] = weak constant %struct.__tgt_offload_entry { i8* bitcast (double** [[VAR_DECL_TGT_TO_PTR]] to i8*), i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[OMP_OFFLOAD_ENTRY_TO_VAR_PTR_NAME]], i32 0, i32 0), i64 8, i32 0, i32 0 }, section ".omp_offloading.entries" + +// CHECK: @llvm.used = appending global [2 x i8*] [i8* bitcast (double** [[VAR_DECL_TGT_LINK_PTR]] to i8*), i8* bitcast (double** [[VAR_DECL_TGT_TO_PTR]] to i8*)], section "llvm.metadata" + // CHECK: [[N_CASTED:%.+]] = alloca i64 // CHECK: [[SUM_CASTED:%.+]] = alloca i64