Index: include/clang/AST/OpenMPClause.h =================================================================== --- include/clang/AST/OpenMPClause.h +++ include/clang/AST/OpenMPClause.h @@ -4228,50 +4228,153 @@ /// 'use_device_ptr' with the variables 'a' and 'b'. /// class OMPUseDevicePtrClause final - : public OMPVarListClause, - private llvm::TrailingObjects { + : public OMPMappableExprListClause, + private llvm::TrailingObjects< + OMPUseDevicePtrClause, Expr *, ValueDecl *, unsigned, + OMPClauseMappableExprCommon::MappableComponent> { friend TrailingObjects; friend OMPVarListClause; + friend OMPMappableExprListClause; friend class OMPClauseReader; - /// Build clause with number of variables \a N. + + /// Define the sizes of each trailing object array except the last one. This + /// is required for TrailingObjects to work properly. + size_t numTrailingObjects(OverloadToken) const { + return 3 * varlist_size(); + } + size_t numTrailingObjects(OverloadToken) const { + return getUniqueDeclarationsNum(); + } + size_t numTrailingObjects(OverloadToken) const { + return getUniqueDeclarationsNum() + getTotalComponentListNum(); + } + + /// \brief Build clause with number of variables \a NumVars. /// /// \param StartLoc Starting location of the clause. - /// \param LParenLoc Location of '('. /// \param EndLoc Ending location of the clause. - /// \param N Number of the variables in the clause. + /// \param NumVars Number of expressions listed in this clause. + /// \param NumUniqueDeclarations Number of unique base declarations in this + /// clause. + /// \param NumComponentLists Number of component lists in this clause. + /// \param NumComponents Total number of expression components in the clause. /// - OMPUseDevicePtrClause(SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation EndLoc, unsigned N) - : OMPVarListClause(OMPC_use_device_ptr, StartLoc, - LParenLoc, EndLoc, N) {} + explicit OMPUseDevicePtrClause(SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc, unsigned NumVars, + unsigned NumUniqueDeclarations, + unsigned NumComponentLists, + unsigned NumComponents) + : OMPMappableExprListClause(OMPC_use_device_ptr, StartLoc, LParenLoc, + EndLoc, NumVars, NumUniqueDeclarations, + NumComponentLists, NumComponents) {} /// \brief Build an empty clause. /// - /// \param N Number of variables. + /// \param NumVars Number of expressions listed in this clause. + /// \param NumUniqueDeclarations Number of unique base declarations in this + /// clause. + /// \param NumComponentLists Number of component lists in this clause. + /// \param NumComponents Total number of expression components in the clause. /// - explicit OMPUseDevicePtrClause(unsigned N) - : OMPVarListClause( - OMPC_use_device_ptr, SourceLocation(), SourceLocation(), - SourceLocation(), N) {} + explicit OMPUseDevicePtrClause(unsigned NumVars, + unsigned NumUniqueDeclarations, + unsigned NumComponentLists, + unsigned NumComponents) + : OMPMappableExprListClause(OMPC_use_device_ptr, SourceLocation(), + SourceLocation(), SourceLocation(), NumVars, + NumUniqueDeclarations, NumComponentLists, + NumComponents) {} + + /// \brief Sets the list of references to private copies with initializers for + /// new private variables. + /// \param VL List of references. + void setPrivateCopies(ArrayRef VL); + + /// \brief Gets the list of references to private copies with initializers for + /// new private variables. + MutableArrayRef getPrivateCopies() { + return MutableArrayRef(varlist_end(), varlist_size()); + } + ArrayRef getPrivateCopies() const { + return llvm::makeArrayRef(varlist_end(), varlist_size()); + } + + /// \brief Sets the list of references to initializer variables for new + /// private variables. + /// \param VL List of references. + void setInits(ArrayRef VL); + + /// \brief Gets the list of references to initializer variables for new + /// private variables. + MutableArrayRef getInits() { + return MutableArrayRef(getPrivateCopies().end(), varlist_size()); + } + ArrayRef getInits() const { + return llvm::makeArrayRef(getPrivateCopies().end(), varlist_size()); + } public: - /// Creates clause with a list of variables \a VL. + /// \brief Creates clause with a list of variables \a Vars. /// /// \param C AST context. /// \param StartLoc Starting location of the clause. - /// \param LParenLoc Location of '('. /// \param EndLoc Ending location of the clause. - /// \param VL List of references to the variables. + /// \param Vars The original expression used in the clause. + /// \param PrivateVars Expressions referring to private copies. + /// \param Inits Expressions referring to private copy initializers. + /// \param Declarations Declarations used in the clause. + /// \param ComponentLists Component lists used in the clause. /// static OMPUseDevicePtrClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation EndLoc, ArrayRef VL); - /// Creates an empty clause with the place for \a N variables. + SourceLocation EndLoc, ArrayRef Vars, + ArrayRef PrivateVars, ArrayRef Inits, + ArrayRef Declarations, + MappableExprComponentListsRef ComponentLists); + + /// \brief Creates an empty clause with the place for \a NumVars variables. /// /// \param C AST context. - /// \param N The number of variables. + /// \param NumVars Number of expressions listed in the clause. + /// \param NumUniqueDeclarations Number of unique base declarations in this + /// clause. + /// \param NumComponentLists Number of unique base declarations in this + /// clause. + /// \param NumComponents Total number of expression components in the clause. /// - static OMPUseDevicePtrClause *CreateEmpty(const ASTContext &C, unsigned N); + static OMPUseDevicePtrClause *CreateEmpty(const ASTContext &C, + unsigned NumVars, + unsigned NumUniqueDeclarations, + unsigned NumComponentLists, + unsigned NumComponents); + + typedef MutableArrayRef::iterator private_copies_iterator; + typedef ArrayRef::iterator private_copies_const_iterator; + typedef llvm::iterator_range private_copies_range; + typedef llvm::iterator_range + private_copies_const_range; + + private_copies_range private_copies() { + return private_copies_range(getPrivateCopies().begin(), + getPrivateCopies().end()); + } + private_copies_const_range private_copies() const { + return private_copies_const_range(getPrivateCopies().begin(), + getPrivateCopies().end()); + } + + typedef MutableArrayRef::iterator inits_iterator; + typedef ArrayRef::iterator inits_const_iterator; + typedef llvm::iterator_range inits_range; + typedef llvm::iterator_range inits_const_range; + + inits_range inits() { + return inits_range(getInits().begin(), getInits().end()); + } + inits_const_range inits() const { + return inits_const_range(getInits().begin(), getInits().end()); + } child_range children() { return child_range(reinterpret_cast(varlist_begin()), Index: lib/AST/OpenMPClause.cpp =================================================================== --- lib/AST/OpenMPClause.cpp +++ lib/AST/OpenMPClause.cpp @@ -732,22 +732,66 @@ NumComponentLists, NumComponents); } -OMPUseDevicePtrClause *OMPUseDevicePtrClause::Create(const ASTContext &C, - SourceLocation StartLoc, - SourceLocation LParenLoc, - SourceLocation EndLoc, - ArrayRef VL) { - void *Mem = C.Allocate(totalSizeToAlloc(VL.size())); - OMPUseDevicePtrClause *Clause = - new (Mem) OMPUseDevicePtrClause(StartLoc, LParenLoc, EndLoc, VL.size()); - Clause->setVarRefs(VL); +void OMPUseDevicePtrClause::setPrivateCopies(ArrayRef VL) { + assert(VL.size() == varlist_size() && + "Number of private copies is not the same as the preallocated buffer"); + std::copy(VL.begin(), VL.end(), varlist_end()); +} + +void OMPUseDevicePtrClause::setInits(ArrayRef VL) { + assert(VL.size() == varlist_size() && + "Number of inits is not the same as the preallocated buffer"); + std::copy(VL.begin(), VL.end(), getPrivateCopies().end()); +} + +OMPUseDevicePtrClause *OMPUseDevicePtrClause::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc, ArrayRef Vars, ArrayRef PrivateVars, + ArrayRef Inits, ArrayRef Declarations, + MappableExprComponentListsRef ComponentLists) { + unsigned NumVars = Vars.size(); + unsigned NumUniqueDeclarations = + getUniqueDeclarationsTotalNumber(Declarations); + unsigned NumComponentLists = ComponentLists.size(); + unsigned NumComponents = getComponentsTotalNumber(ComponentLists); + + // We need to allocate: + // 3 x NumVars x Expr* - we have an original list expression for each clause + // list entry and an equal number of private copies and inits. + // NumUniqueDeclarations x ValueDecl* - unique base declarations associated + // with each component list. + // (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the + // number of lists for each unique declaration and the size of each component + // list. + // NumComponents x MappableComponent - the total of all the components in all + // the lists. + void *Mem = C.Allocate( + totalSizeToAlloc( + 3 * NumVars, NumUniqueDeclarations, + NumUniqueDeclarations + NumComponentLists, NumComponents)); + + OMPUseDevicePtrClause *Clause = new (Mem) OMPUseDevicePtrClause( + StartLoc, LParenLoc, EndLoc, NumVars, NumUniqueDeclarations, + NumComponentLists, NumComponents); + + Clause->setVarRefs(Vars); + Clause->setPrivateCopies(PrivateVars); + Clause->setInits(Inits); + Clause->setClauseInfo(Declarations, ComponentLists); return Clause; } -OMPUseDevicePtrClause *OMPUseDevicePtrClause::CreateEmpty(const ASTContext &C, - unsigned N) { - void *Mem = C.Allocate(totalSizeToAlloc(N)); - return new (Mem) OMPUseDevicePtrClause(N); +OMPUseDevicePtrClause *OMPUseDevicePtrClause::CreateEmpty( + const ASTContext &C, unsigned NumVars, unsigned NumUniqueDeclarations, + unsigned NumComponentLists, unsigned NumComponents) { + void *Mem = C.Allocate( + totalSizeToAlloc( + 3 * NumVars, NumUniqueDeclarations, + NumUniqueDeclarations + NumComponentLists, NumComponents)); + return new (Mem) OMPUseDevicePtrClause(NumVars, NumUniqueDeclarations, + NumComponentLists, NumComponents); } OMPIsDevicePtrClause *OMPIsDevicePtrClause::Create(const ASTContext &C, Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -997,17 +997,62 @@ virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc); + /// Struct that keeps all the relevant information that should be kept + /// throughout a 'target data' region. + class TargetDataInfo { + /// Set to true if device pointer information have to be obtained. + bool RequiresDevicePointerInfo = false; + + public: + /// The array of base pointer passed to the runtime library. + llvm::Value *BasePointersArray = nullptr; + /// The array of section pointers passed to the runtime library. + llvm::Value *PointersArray = nullptr; + /// The array of sizes passed to the runtime library. + llvm::Value *SizesArray = nullptr; + /// The array of map types passed to the runtime library. + llvm::Value *MapTypesArray = nullptr; + /// The total number of pointers passed to the runtime library. + unsigned NumberOfPtrs = 0u; + /// Map between the a declaration of a capture and the corresponding base + /// pointer address where the runtime returns the device pointers. + llvm::DenseMap CaptureDeviceAddrMap; + + explicit TargetDataInfo() {} + explicit TargetDataInfo(bool RequiresDevicePointerInfo) + : RequiresDevicePointerInfo(RequiresDevicePointerInfo) {} + /// Clear information about the data arrays. + void clearArrayInfo() { + BasePointersArray = nullptr; + PointersArray = nullptr; + SizesArray = nullptr; + MapTypesArray = nullptr; + NumberOfPtrs = 0u; + } + /// Return true if the current target data information has valid arrays. + bool isValid() { + return BasePointersArray && PointersArray && SizesArray && + MapTypesArray && NumberOfPtrs; + } + bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; } + }; + /// \brief Emit the target data mapping code associated with \a D. /// \param D Directive to emit. - /// \param IfCond Expression evaluated in if clause associated with the target - /// directive, or null if no if clause is used. + /// \param IfCond Expression evaluated in if clause associated with the + /// target directive, or null if no device clause is used. /// \param Device Expression evaluated in device clause associated with the /// target directive, or null if no device clause is used. - /// \param CodeGen Function that emits the enclosed region. + /// \param PvtCodeGen Emitter with device pointer privatization support. + /// \param NoPvtCodeGen Emitter with no device pointer privatization support.. + /// \param Info A record used to store information that needs to be preserved + /// until the region is closed. virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, - const RegionCodeGenTy &CodeGen); + const RegionCodeGenTy &PvtCodeGen, + const RegionCodeGenTy &NoPvtCodeGen, + TargetDataInfo &Info); /// \brief Emit the data mapping/movement code associated with the directive /// \a D that should be of the form 'target [{enter|exit} data | update]'. Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -4986,8 +4986,29 @@ OMP_MAP_PRIVATE_PTR = 0x80, /// \brief Pass the element to the device by value. OMP_MAP_PRIVATE_VAL = 0x100, + /// \brief Signal that the runtime library has to return the device pointer + /// in the current position for the data being mapped. + OMP_MAP_RETURN_PTR = 0x200, }; + /// Class that associates information with a base pointer to be passed to the + /// runtime library. + class BasePointerInfo { + /// The base pointer. + llvm::Value *Ptr = nullptr; + /// The base declaration that refers to this device pointer, or null if + /// there is none. + const ValueDecl *DevPtrDecl = nullptr; + + public: + BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) + : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} + llvm::Value *operator*() const { return Ptr; } + const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } + void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } + }; + + typedef SmallVector MapBaseValuesArrayTy; typedef SmallVector MapValuesArrayTy; typedef SmallVector MapFlagsArrayTy; @@ -5129,7 +5150,7 @@ void generateInfoForComponentList( OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, OMPClauseMappableExprCommon::MappableExprComponentListRef Components, - MapValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, + MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, bool IsFirstComponentList) const { @@ -5380,8 +5401,10 @@ } /// \brief Generate all the base pointers, section pointers, sizes and map - /// types for the extracted mappable expressions. - void generateAllInfo(MapValuesArrayTy &BasePointers, + /// types for the extracted mappable expressions. Also, for each item that + /// relates with a device pointer, a pair of the relevant declaration and + /// index where it occurs is appended to the device pointers info array. + void generateAllInfo(MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { BasePointers.clear(); @@ -5390,9 +5413,26 @@ Types.clear(); struct MapInfo { + /// Kind that defines how a device pointer has to be returned. + enum ReturnPointerKind { + // Don't have to return any pointer. + RPK_None, + // Pointer is the base of the declaration. + RPK_Base, + // Pointer is a member of the base declaration - 'this' + RPK_Member, + }; OMPClauseMappableExprCommon::MappableExprComponentListRef Components; - OpenMPMapClauseKind MapType; - OpenMPMapClauseKind MapTypeModifier; + OpenMPMapClauseKind MapType = OMPC_MAP_unknown; + OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; + ReturnPointerKind ReturnDevicePointer = RPK_None; + MapInfo( + OMPClauseMappableExprCommon::MappableExprComponentListRef Components, + OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, + ReturnPointerKind ReturnDevicePointer) + : Components(Components), MapType(MapType), + MapTypeModifier(MapTypeModifier), + ReturnDevicePointer(ReturnDevicePointer) {} }; // We have to process the component lists that relate with the same @@ -5402,14 +5442,15 @@ // Helper function to fill the information map for the different supported // clauses. - auto &&InfoGen = - [&Info](const ValueDecl *D, - OMPClauseMappableExprCommon::MappableExprComponentListRef L, - OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier) { - const ValueDecl *VD = - D ? cast(D->getCanonicalDecl()) : nullptr; - Info[VD].push_back({L, MapType, MapModifier}); - }; + auto &&InfoGen = [&Info]( + const ValueDecl *D, + OMPClauseMappableExprCommon::MappableExprComponentListRef L, + OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, + MapInfo::ReturnPointerKind ReturnDevicePointer = MapInfo::RPK_None) { + const ValueDecl *VD = + D ? cast(D->getCanonicalDecl()) : nullptr; + Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer}); + }; for (auto *C : Directive.getClausesOfKind()) for (auto L : C->component_lists()) @@ -5421,6 +5462,48 @@ for (auto L : C->component_lists()) InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown); + // Look at the use_device_ptr clause information and mark the existing map + // entries as such. If there is no map information for an entry in the + // use_device_ptr list, we create one with map type 'alloc' and zero size + // section. It is the user fault if that was not mapped before. + for (auto *C : Directive.getClausesOfKind()) + for (auto L : C->component_lists()) { + assert(!L.second.empty() && "Not expecting empty list of components!"); + const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); + VD = cast(VD->getCanonicalDecl()); + auto *IE = L.second.back().getAssociatedExpression(); + // If the first component is a member expression, we have to look into + // 'this', which maps to null in the map of map information. Otherwise + // look directly for the information. + auto It = Info.find(isa(IE) ? nullptr : VD); + + // We potentially have map information for this declaration already. + // Look for the first set of components that refer to it. + if (It != Info.end()) { + auto CI = std::find_if( + It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { + return MI.Components.back().getAssociatedDeclaration() == VD; + }); + // If we found a map entry, signal that the pointer has to be returned + // and move on to the next declaration. + if (CI != It->second.end()) { + CI->ReturnDevicePointer = + isa(IE) ? MapInfo::RPK_Member : MapInfo::RPK_Base; + continue; + } + } + + // We didn't find any match in our map information - generate a zero + // size array section. + llvm::Value *Ptr = + CGF.EmitLoadOfLValue(CGF.EmitLValue(IE), SourceLocation()) + .getScalarVal(); + BasePointers.push_back({Ptr, VD}); + Pointers.push_back(Ptr); + Sizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); + Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF); + } + for (auto &M : Info) { // We need to know when we generate information for the first component // associated with a capture, because the mapping flags depend on it. @@ -5428,9 +5511,31 @@ for (MapInfo &L : M.second) { assert(!L.Components.empty() && "Not expecting declaration with no component lists."); + + // Remember the current base pointer index. + unsigned CurrentBasePointersIdx = BasePointers.size(); generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers, Sizes, Types, IsFirstComponentList); + + // If this entry relates with a device pointer, set the relevant + // declaration and add the 'return pointer' flag. + if (IsFirstComponentList && + L.ReturnDevicePointer != MapInfo::RPK_None) { + // If the pointer is not the base of the map, we need to skip the + // base. + if (L.ReturnDevicePointer != MapInfo::RPK_Base) + ++CurrentBasePointersIdx; + assert(BasePointers.size() > CurrentBasePointersIdx && + "Unexpected number of mapped base pointers."); + + auto *RelevantVD = L.Components.back().getAssociatedDeclaration(); + assert(RelevantVD && + "No relevant declaration related with device pointer??"); + + BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); + Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR; + } IsFirstComponentList = false; } } @@ -5439,7 +5544,7 @@ /// \brief Generate the base pointers, section pointers, sizes and map types /// associated to a given capture. void generateInfoForCapture(const CapturedStmt::Capture *Cap, - MapValuesArrayTy &BasePointers, + MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { @@ -5476,12 +5581,12 @@ /// \brief Generate the default map information for a given capture \a CI, /// record field declaration \a RI and captured value \a CV. - void generateDefaultMapInfo( - const CapturedStmt::Capture &CI, const FieldDecl &RI, llvm::Value *CV, - MappableExprsHandler::MapValuesArrayTy &CurBasePointers, - MappableExprsHandler::MapValuesArrayTy &CurPointers, - MappableExprsHandler::MapValuesArrayTy &CurSizes, - MappableExprsHandler::MapFlagsArrayTy &CurMapTypes) { + void generateDefaultMapInfo(const CapturedStmt::Capture &CI, + const FieldDecl &RI, llvm::Value *CV, + MapBaseValuesArrayTy &CurBasePointers, + MapValuesArrayTy &CurPointers, + MapValuesArrayTy &CurSizes, + MapFlagsArrayTy &CurMapTypes) { // Do the default mapping. if (CI.capturesThis()) { @@ -5490,15 +5595,14 @@ const PointerType *PtrTy = cast(RI.getType().getTypePtr()); CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); // Default map type. - CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_FROM); + CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); } else if (CI.capturesVariableByCopy()) { CurBasePointers.push_back(CV); CurPointers.push_back(CV); if (!RI.getType()->isAnyPointerType()) { // We have to signal to the runtime captures passed by value that are // not pointers. - CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL); + CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL); CurSizes.push_back(CGF.getTypeSize(RI.getType())); } else { // Pointers are implicitly mapped with a zero size and no flags @@ -5519,9 +5623,8 @@ // default the value doesn't have to be retrieved. For an aggregate // type, the default is 'tofrom'. CurMapTypes.push_back(ElementType->isAggregateType() - ? (MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_FROM) - : MappableExprsHandler::OMP_MAP_TO); + ? (OMP_MAP_TO | OMP_MAP_FROM) + : OMP_MAP_TO); // If we have a capture by reference we may need to add the private // pointer flag if the base declaration shows in some first-private @@ -5531,7 +5634,7 @@ } // Every default map produces a single argument, so, it is always the // first one. - CurMapTypes.back() |= MappableExprsHandler::OMP_MAP_FIRST_REF; + CurMapTypes.back() |= OMP_MAP_FIRST_REF; } }; @@ -5546,19 +5649,20 @@ /// offloading runtime library. If there is no map or capture information, /// return nullptr by reference. static void -emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray, - llvm::Value *&PointersArray, llvm::Value *&SizesArray, - llvm::Value *&MapTypesArray, - MappableExprsHandler::MapValuesArrayTy &BasePointers, +emitOffloadingArrays(CodeGenFunction &CGF, + MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, MappableExprsHandler::MapValuesArrayTy &Pointers, MappableExprsHandler::MapValuesArrayTy &Sizes, - MappableExprsHandler::MapFlagsArrayTy &MapTypes) { + MappableExprsHandler::MapFlagsArrayTy &MapTypes, + CGOpenMPRuntime::TargetDataInfo &Info) { auto &CGM = CGF.CGM; auto &Ctx = CGF.getContext(); - BasePointersArray = PointersArray = SizesArray = MapTypesArray = nullptr; + // Reset the array information. + Info.clearArrayInfo(); + Info.NumberOfPtrs = BasePointers.size(); - if (unsigned PointerNumVal = BasePointers.size()) { + if (Info.NumberOfPtrs) { // Detect if we have any capture size requiring runtime evaluation of the // size so that a constant array could be eventually used. bool hasRuntimeEvaluationCaptureSize = false; @@ -5568,14 +5672,14 @@ break; } - llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); + llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); QualType PointerArrayType = Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, /*IndexTypeQuals=*/0); - BasePointersArray = + Info.BasePointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); - PointersArray = + Info.PointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); // If we don't have any VLA types or other types that require runtime @@ -5585,7 +5689,7 @@ QualType SizeArrayType = Ctx.getConstantArrayType( Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, /*IndexTypeQuals=*/0); - SizesArray = + Info.SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); } else { // We expect all the sizes to be constant, so we collect them to create @@ -5601,7 +5705,7 @@ /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, SizesArrayInit, ".offload_sizes"); SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - SizesArray = SizesArrayGbl; + Info.SizesArray = SizesArrayGbl; } // The map types are always constant so we don't need to generate code to @@ -5613,10 +5717,10 @@ /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapTypesArrayInit, ".offload_maptypes"); MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - MapTypesArray = MapTypesArrayGbl; + Info.MapTypesArray = MapTypesArrayGbl; - for (unsigned i = 0; i < PointerNumVal; ++i) { - llvm::Value *BPVal = BasePointers[i]; + for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { + llvm::Value *BPVal = *BasePointers[i]; if (BPVal->getType()->isPointerTy()) BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); else { @@ -5625,11 +5729,15 @@ BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); } llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, - 0, i); + llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), + Info.BasePointersArray, 0, i); Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(BPVal, BPAddr); + if (Info.requiresDevicePointerInfo()) + if (auto *DevVD = BasePointers[i].getDevicePtrDecl()) + Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); + llvm::Value *PVal = Pointers[i]; if (PVal->getType()->isPointerTy()) PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); @@ -5639,14 +5747,15 @@ PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); } llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0, - i); + llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), + Info.PointersArray, 0, i); Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(PVal, PAddr); if (hasRuntimeEvaluationCaptureSize) { llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, + llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), + Info.SizesArray, /*Idx0=*/0, /*Idx1=*/i); Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); @@ -5662,23 +5771,24 @@ static void emitOffloadingArraysArgument( CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, - llvm::Value *&MapTypesArrayArg, llvm::Value *BasePointersArray, - llvm::Value *PointersArray, llvm::Value *SizesArray, - llvm::Value *MapTypesArray, unsigned NumElems) { + llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { auto &CGM = CGF.CGM; - if (NumElems) { + if (Info.NumberOfPtrs) { BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), BasePointersArray, + llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), + Info.BasePointersArray, /*Idx0=*/0, /*Idx1=*/0); PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), PointersArray, + llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), + Info.PointersArray, /*Idx0=*/0, /*Idx1=*/0); SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.SizeTy, NumElems), SizesArray, + llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, /*Idx0=*/0, /*Idx1=*/0); MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.Int32Ty, NumElems), MapTypesArray, + llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs), + Info.MapTypesArray, /*Idx0=*/0, /*Idx1=*/0); } else { @@ -5705,12 +5815,12 @@ // Fill up the arrays with all the captured variables. MappableExprsHandler::MapValuesArrayTy KernelArgs; - MappableExprsHandler::MapValuesArrayTy BasePointers; + MappableExprsHandler::MapBaseValuesArrayTy BasePointers; MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapFlagsArrayTy MapTypes; - MappableExprsHandler::MapValuesArrayTy CurBasePointers; + MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; MappableExprsHandler::MapValuesArrayTy CurPointers; MappableExprsHandler::MapValuesArrayTy CurSizes; MappableExprsHandler::MapFlagsArrayTy CurMapTypes; @@ -5759,7 +5869,7 @@ // The kernel args are always the first elements of the base pointers // associated with a capture. - KernelArgs.push_back(CurBasePointers.front()); + KernelArgs.push_back(*CurBasePointers.front()); // We need to append the results of this capture to what we already have. BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); Pointers.append(CurPointers.begin(), CurPointers.end()); @@ -5782,17 +5892,11 @@ &D](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); // Emit the offloading arrays. - llvm::Value *BasePointersArray; - llvm::Value *PointersArray; - llvm::Value *SizesArray; - llvm::Value *MapTypesArray; - emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray, - MapTypesArray, BasePointers, Pointers, Sizes, - MapTypes); - emitOffloadingArraysArgument(CGF, BasePointersArray, PointersArray, - SizesArray, MapTypesArray, BasePointersArray, - PointersArray, SizesArray, MapTypesArray, - BasePointers.size()); + TargetDataInfo Info; + emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitOffloadingArraysArgument(CGF, Info.BasePointersArray, + Info.PointersArray, Info.SizesArray, + Info.MapTypesArray, Info); // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host @@ -5833,15 +5937,19 @@ assert(ThreadLimit && "Thread limit expression should be available along " "with number of teams."); llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, PointerNum, - BasePointersArray, PointersArray, SizesArray, - MapTypesArray, NumTeams, ThreadLimit}; + DeviceID, OutlinedFnID, + PointerNum, Info.BasePointersArray, + Info.PointersArray, Info.SizesArray, + Info.MapTypesArray, NumTeams, + ThreadLimit}; Return = CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); } else { llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, PointerNum, BasePointersArray, - PointersArray, SizesArray, MapTypesArray}; + DeviceID, OutlinedFnID, + PointerNum, Info.BasePointersArray, + Info.PointersArray, Info.SizesArray, + Info.MapTypesArray}; Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), OffloadingArgs); } @@ -6053,29 +6161,20 @@ PushNumTeamsArgs); } -void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - const Expr *IfCond, - const Expr *Device, - const RegionCodeGenTy &CodeGen) { - +void CGOpenMPRuntime::emitTargetDataCalls( + CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, + const Expr *Device, const RegionCodeGenTy &PvtCodeGen, + const RegionCodeGenTy &NoPvtCodeGen, TargetDataInfo &Info) { if (!CGF.HaveInsertPoint()) return; - llvm::Value *BasePointersArray = nullptr; - llvm::Value *PointersArray = nullptr; - llvm::Value *SizesArray = nullptr; - llvm::Value *MapTypesArray = nullptr; - unsigned NumOfPtrs = 0; - // Generate the code for the opening of the data environment. Capture all the // arguments of the runtime call by reference because they are used in the // closing of the region. - auto &&BeginThenGen = [&D, &CGF, &BasePointersArray, &PointersArray, - &SizesArray, &MapTypesArray, Device, - &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&BeginThenGen = [&D, &CGF, Device, &Info, + &PvtCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { // Fill up the arrays with all the mapped variables. - MappableExprsHandler::MapValuesArrayTy BasePointers; + MappableExprsHandler::MapBaseValuesArrayTy BasePointers; MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapFlagsArrayTy MapTypes; @@ -6083,21 +6182,16 @@ // Get map clause information. MappableExprsHandler MCHandler(D, CGF); MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); - NumOfPtrs = BasePointers.size(); // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray, - MapTypesArray, BasePointers, Pointers, Sizes, - MapTypes); + emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); llvm::Value *BasePointersArrayArg = nullptr; llvm::Value *PointersArrayArg = nullptr; llvm::Value *SizesArrayArg = nullptr; llvm::Value *MapTypesArrayArg = nullptr; emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, - SizesArrayArg, MapTypesArrayArg, - BasePointersArray, PointersArray, SizesArray, - MapTypesArray, NumOfPtrs); + SizesArrayArg, MapTypesArrayArg, Info); // Emit device ID if any. llvm::Value *DeviceID = nullptr; @@ -6108,7 +6202,7 @@ DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); // Emit the number of elements in the offloading arrays. - auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs); + auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, @@ -6116,23 +6210,26 @@ auto &RT = CGF.CGM.getOpenMPRuntime(); CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), OffloadingArgs); + + // If device pointer privatization is required, emit the body of the region + // here. It will have to be duplicated: with and without privatization. + if (!Info.CaptureDeviceAddrMap.empty()) { + RegionCodeGenTy RCG(PvtCodeGen); + RCG(CGF); + } }; // Generate code for the closing of the data region. - auto &&EndThenGen = [&CGF, &BasePointersArray, &PointersArray, &SizesArray, - &MapTypesArray, Device, - &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) { - assert(BasePointersArray && PointersArray && SizesArray && MapTypesArray && - NumOfPtrs && "Invalid data environment closing arguments."); + auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF, + PrePostActionTy &) { + assert(Info.isValid() && "Invalid data environment closing arguments."); llvm::Value *BasePointersArrayArg = nullptr; llvm::Value *PointersArrayArg = nullptr; llvm::Value *SizesArrayArg = nullptr; llvm::Value *MapTypesArrayArg = nullptr; emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, - SizesArrayArg, MapTypesArrayArg, - BasePointersArray, PointersArray, SizesArray, - MapTypesArray, NumOfPtrs); + SizesArrayArg, MapTypesArrayArg, Info); // Emit device ID if any. llvm::Value *DeviceID = nullptr; @@ -6143,7 +6240,7 @@ DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); // Emit the number of elements in the offloading arrays. - auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs); + auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, @@ -6153,24 +6250,40 @@ OffloadingArgs); }; - // In the event we get an if clause, we don't have to take any action on the - // else side. - auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; + // If we need device pointer privatization, we need to emit the body of the + // region with no privatization in the 'else' branch of the conditional. + // Otherwise, we don't have to do anything. + auto &&BeginElseGen = [&Info, &NoPvtCodeGen](CodeGenFunction &CGF, + PrePostActionTy &) { + if (!Info.CaptureDeviceAddrMap.empty()) { + RegionCodeGenTy RCG(NoPvtCodeGen); + RCG(CGF); + } + }; + + // We don't have to do anything to close the region if the if clause evaluates + // to false. + auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; if (IfCond) { - emitOMPIfClause(CGF, IfCond, BeginThenGen, ElseGen); + emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); } else { - RegionCodeGenTy BeginThenRCG(BeginThenGen); - BeginThenRCG(CGF); + RegionCodeGenTy RCG(BeginThenGen); + RCG(CGF); } - CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, CodeGen); + // If we don't require privatization of device pointers, we emit the body in + // between the runtime calls. This avoids duplicating the body code. + if (Info.CaptureDeviceAddrMap.empty()) { + RegionCodeGenTy RCG(NoPvtCodeGen); + RCG(CGF); + } if (IfCond) { - emitOMPIfClause(CGF, IfCond, EndThenGen, ElseGen); + emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); } else { - RegionCodeGenTy EndThenRCG(EndThenGen); - EndThenRCG(CGF); + RegionCodeGenTy RCG(EndThenGen); + RCG(CGF); } } @@ -6188,7 +6301,7 @@ // Generate the code for the opening of the data environment. auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) { // Fill up the arrays with all the mapped variables. - MappableExprsHandler::MapValuesArrayTy BasePointers; + MappableExprsHandler::MapBaseValuesArrayTy BasePointers; MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapFlagsArrayTy MapTypes; @@ -6197,19 +6310,12 @@ MappableExprsHandler MEHandler(D, CGF); MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); - llvm::Value *BasePointersArrayArg = nullptr; - llvm::Value *PointersArrayArg = nullptr; - llvm::Value *SizesArrayArg = nullptr; - llvm::Value *MapTypesArrayArg = nullptr; - // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, BasePointersArrayArg, PointersArrayArg, - SizesArrayArg, MapTypesArrayArg, BasePointers, - Pointers, Sizes, MapTypes); - emitOffloadingArraysArgument( - CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, - MapTypesArrayArg, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, - MapTypesArrayArg, BasePointers.size()); + TargetDataInfo Info; + emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitOffloadingArraysArgument(CGF, Info.BasePointersArray, + Info.PointersArray, Info.SizesArray, + Info.MapTypesArray, Info); // Emit device ID if any. llvm::Value *DeviceID = nullptr; @@ -6223,8 +6329,8 @@ auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); llvm::Value *OffloadingArgs[] = { - DeviceID, PointerNum, BasePointersArrayArg, - PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; + DeviceID, PointerNum, Info.BasePointersArray, + Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; auto &RT = CGF.CGM.getOpenMPRuntime(); // Select the right runtime function call for each expected standalone Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -3400,22 +3400,115 @@ return BreakContinueStack.back().BreakBlock; } +void CodeGenFunction::EmitOMPUseDevicePtrClause( + const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope, + llvm::DenseMap &CaptureDeviceAddrMap) { + if (!HaveInsertPoint()) + return; + + llvm::DenseSet EmittedAsFirstprivate; + CGCapturedStmtInfo CapturesInfo(cast(*D.getAssociatedStmt())); + for (const auto *C : D.getClausesOfKind()) { + auto OrigVarIt = C->varlist_begin(); + auto InitIt = C->inits().begin(); + for (auto PvtVarIt : C->private_copies()) { + auto *OrigVD = cast(cast(*OrigVarIt)->getDecl()); + auto *InitVD = cast(cast(*InitIt)->getDecl()); + auto *PvtVD = cast(cast(PvtVarIt)->getDecl()); + + // In order to identify the right initializer we need to match the + // declaration used by the mapping logic. In some cases we may get + // OMPCapturedExprDecl that refers to the original declaration. + const ValueDecl *MatchingVD = OrigVD; + if (auto *OED = dyn_cast(MatchingVD)) { + // OMPCapturedExprDecl are used to privative fields of the current + // structure. + auto *ME = cast(OED->getInit()); + assert(isa(ME->getBase()) && + "Base should be the current struct!"); + MatchingVD = ME->getMemberDecl(); + } + + // If we don't have information about the current list item, move on to + // the next one. + auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); + if (InitAddrIt == CaptureDeviceAddrMap.end()) + continue; + + bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { + // Initialize the temporary initialization variable with the address we + // get from the runtime library. We have to cast the source address + // because it is always a void *. References are materialized in the + // privatization scope, so the initialization here disregards the fact + // the original variable is a reference. + QualType AddrQTy = getContext().getPointerType( + OrigVD->getType().getNonReferenceType()); + llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); + Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); + setAddrOfLocalVar(InitVD, InitAddr); + + // Emit private declaration, it will be initialized by the value we + // declaration we just added to the local declarations map. + EmitDecl(*PvtVD); + + // The initialization variables reached its purpose in the emission + // ofthe previous declaration, so we don't need it anymore. + LocalDeclMap.erase(InitVD); + + // Return the address of the private variable. + return GetAddrOfLocalVar(PvtVD); + }); + assert(IsRegistered && "firstprivate var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + + ++OrigVarIt; + ++InitIt; + } + } +} + // Generate the instructions for '#pragma omp target data' directive. void CodeGenFunction::EmitOMPTargetDataDirective( const OMPTargetDataDirective &S) { - // The target data enclosed region is implemented just by emitting the - // statement. - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); + + // Codegen with device pointer privatization. + auto &&PvtCodeGen = [&S, &Info](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&CodeGen = [&S, &Info](CodeGenFunction &CGF, PrePostActionTy &) { + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPUseDevicePtrClause(S, PrivateScope, Info.CaptureDeviceAddrMap); + (void)PrivateScope.Privatize(); + CGF.EmitStmt( + cast(S.getAssociatedStmt())->getCapturedStmt()); + }; + // Notwithstanding the body of the region is emitted as inlined directive, + // we don't use an inline scope as changes in the references inside the + // region are expected to be visible outside, so we do not privative them. + OMPLexicalScope Scope(CGF, S); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, + CodeGen); + }; + + // Codegen with no device pointer privatization. + auto &&NoPvtCodeGen = [&S, &Info](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&CodeGen = [&S, &Info](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt( + cast(S.getAssociatedStmt())->getCapturedStmt()); + }; + // Notwithstanding the body of the region is emitted as inlined directive, + // we don't use an inline scope as changes in the references inside the + // region are expected to be visible outside, so we do not privative them. + OMPLexicalScope Scope(CGF, S); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, + CodeGen); }; // If we don't have target devices, don't bother emitting the data mapping // code. if (CGM.getLangOpts().OMPTargetTriples.empty()) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_data, - CodeGen); + RegionCodeGenTy RCG(NoPvtCodeGen); + RCG(*this); return; } @@ -3429,7 +3522,9 @@ if (auto *C = S.getSingleClause()) Device = C->getDevice(); - CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, CodeGen); + // Emit region code. + CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, + PvtCodeGen, NoPvtCodeGen, Info); } void CodeGenFunction::EmitOMPTargetEnterDataDirective( Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -2392,6 +2392,9 @@ OMPPrivateScope &PrivateScope); void EmitOMPPrivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope); + void EmitOMPUseDevicePtrClause( + const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope, + llvm::DenseMap &CaptureDeviceAddrMap); /// \brief Emit code for copyin clause in \a D directive. The next code is /// generated at the start of outlined functions for directives: /// \code Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -11792,7 +11792,10 @@ SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { - SmallVector Vars; + MappableVarListInfo MVLI(VarList); + SmallVector PrivateCopies; + SmallVector Inits; + for (auto &RefExpr : VarList) { assert(RefExpr && "NULL expr in OpenMP use_device_ptr clause."); SourceLocation ELoc; @@ -11801,27 +11804,73 @@ auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange); if (Res.second) { // It will be analyzed later. - Vars.push_back(RefExpr); + MVLI.ProcessedVarList.push_back(RefExpr); + PrivateCopies.push_back(nullptr); + Inits.push_back(nullptr); } ValueDecl *D = Res.first; if (!D) continue; QualType Type = D->getType(); - // item should be a pointer or reference to pointer - if (!Type.getNonReferenceType()->isPointerType()) { + Type = Type.getNonReferenceType().getUnqualifiedType(); + + auto *VD = dyn_cast(D); + + // Item should be a pointer or reference to pointer. + if (!Type->isPointerType()) { Diag(ELoc, diag::err_omp_usedeviceptr_not_a_pointer) << 0 << RefExpr->getSourceRange(); continue; } - Vars.push_back(RefExpr->IgnoreParens()); + + // Build the private variable and the expression that refers to it. + auto VDPrivate = buildVarDecl(*this, ELoc, Type, D->getName(), + D->hasAttrs() ? &D->getAttrs() : nullptr); + if (VDPrivate->isInvalidDecl()) + continue; + + CurContext->addDecl(VDPrivate); + auto VDPrivateRefExpr = buildDeclRefExpr( + *this, VDPrivate, RefExpr->getType().getUnqualifiedType(), ELoc); + + // Add temporary variable to initialize the private copy of the pointer. + auto *VDInit = + buildVarDecl(*this, RefExpr->getExprLoc(), Type, ".devptr.temp"); + auto *VDInitRefExpr = buildDeclRefExpr(*this, VDInit, RefExpr->getType(), + RefExpr->getExprLoc()); + AddInitializerToDecl(VDPrivate, + DefaultLvalueConversion(VDInitRefExpr).get(), + /*DirectInit=*/false, /*TypeMayContainAuto=*/false); + + // If required, build a capture to implement the privatization initialized + // with the current list item value. + DeclRefExpr *Ref = nullptr; + if (!VD) + Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true); + MVLI.ProcessedVarList.push_back(VD ? RefExpr->IgnoreParens() : Ref); + PrivateCopies.push_back(VDPrivateRefExpr); + Inits.push_back(VDInitRefExpr); + + // We need to add a data sharing attribute for this variable to make sure it + // is correctly captured. A variable that shows up in a use_device_ptr has + // similar properties of a first private variable. + DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_firstprivate, Ref); + + // Create a mappable component for the list item. List items in this clause + // only need a component. + MVLI.VarBaseDeclarations.push_back(D); + MVLI.VarComponents.resize(MVLI.VarComponents.size() + 1); + MVLI.VarComponents.back().push_back( + OMPClauseMappableExprCommon::MappableComponent(SimpleRefExpr, D)); } - if (Vars.empty()) + if (MVLI.ProcessedVarList.empty()) return nullptr; - return OMPUseDevicePtrClause::Create(Context, StartLoc, LParenLoc, EndLoc, - Vars); + return OMPUseDevicePtrClause::Create( + Context, StartLoc, LParenLoc, EndLoc, MVLI.ProcessedVarList, + PrivateCopies, Inits, MVLI.VarBaseDeclarations, MVLI.VarComponents); } OMPClause *Sema::ActOnOpenMPIsDevicePtrClause(ArrayRef VarList, Index: lib/Serialization/ASTReaderStmt.cpp =================================================================== --- lib/Serialization/ASTReaderStmt.cpp +++ lib/Serialization/ASTReaderStmt.cpp @@ -1932,9 +1932,15 @@ NumComponents); break; } - case OMPC_use_device_ptr: - C = OMPUseDevicePtrClause::CreateEmpty(Context, Record[Idx++]); + case OMPC_use_device_ptr: { + unsigned NumVars = Record[Idx++]; + unsigned NumDeclarations = Record[Idx++]; + unsigned NumLists = Record[Idx++]; + unsigned NumComponents = Record[Idx++]; + C = OMPUseDevicePtrClause::CreateEmpty(Context, NumVars, NumDeclarations, + NumLists, NumComponents); break; + } case OMPC_is_device_ptr: C = OMPIsDevicePtrClause::CreateEmpty(Context, Record[Idx++]); break; @@ -2457,13 +2463,54 @@ void OMPClauseReader::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) { C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx)); - unsigned NumVars = C->varlist_size(); + auto NumVars = C->varlist_size(); + auto UniqueDecls = C->getUniqueDeclarationsNum(); + auto TotalLists = C->getTotalComponentListNum(); + auto TotalComponents = C->getTotalComponentsNum(); + SmallVector Vars; Vars.reserve(NumVars); for (unsigned i = 0; i != NumVars; ++i) Vars.push_back(Reader->Reader.ReadSubExpr()); C->setVarRefs(Vars); Vars.clear(); + for (unsigned i = 0; i != NumVars; ++i) + Vars.push_back(Reader->Reader.ReadSubExpr()); + C->setPrivateCopies(Vars); + Vars.clear(); + for (unsigned i = 0; i != NumVars; ++i) + Vars.push_back(Reader->Reader.ReadSubExpr()); + C->setInits(Vars); + + SmallVector Decls; + Decls.reserve(UniqueDecls); + for (unsigned i = 0; i < UniqueDecls; ++i) + Decls.push_back( + Reader->Reader.ReadDeclAs(Reader->F, Record, Idx)); + C->setUniqueDecls(Decls); + + SmallVector ListsPerDecl; + ListsPerDecl.reserve(UniqueDecls); + for (unsigned i = 0; i < UniqueDecls; ++i) + ListsPerDecl.push_back(Record[Idx++]); + C->setDeclNumLists(ListsPerDecl); + + SmallVector ListSizes; + ListSizes.reserve(TotalLists); + for (unsigned i = 0; i < TotalLists; ++i) + ListSizes.push_back(Record[Idx++]); + C->setComponentListSizes(ListSizes); + + SmallVector Components; + Components.reserve(TotalComponents); + for (unsigned i = 0; i < TotalComponents; ++i) { + Expr *AssociatedExpr = Reader->Reader.ReadSubExpr(); + ValueDecl *AssociatedDecl = + Reader->Reader.ReadDeclAs(Reader->F, Record, Idx); + Components.push_back(OMPClauseMappableExprCommon::MappableComponent( + AssociatedExpr, AssociatedDecl)); + } + C->setComponents(Components, ListSizes); } void OMPClauseReader::VisitOMPIsDevicePtrClause(OMPIsDevicePtrClause *C) { Index: lib/Serialization/ASTWriterStmt.cpp =================================================================== --- lib/Serialization/ASTWriterStmt.cpp +++ lib/Serialization/ASTWriterStmt.cpp @@ -2151,9 +2151,25 @@ void OMPClauseWriter::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) { Record.push_back(C->varlist_size()); + Record.push_back(C->getUniqueDeclarationsNum()); + Record.push_back(C->getTotalComponentListNum()); + Record.push_back(C->getTotalComponentsNum()); Record.AddSourceLocation(C->getLParenLoc()); - for (auto *VE : C->varlists()) { + for (auto *E : C->varlists()) + Record.AddStmt(E); + for (auto *VE : C->private_copies()) Record.AddStmt(VE); + for (auto *VE : C->inits()) + Record.AddStmt(VE); + for (auto *D : C->all_decls()) + Record.AddDeclRef(D); + for (auto N : C->all_num_lists()) + Record.push_back(N); + for (auto N : C->all_lists_sizes()) + Record.push_back(N); + for (auto &M : C->all_components()) { + Record.AddStmt(M.getAssociatedExpression()); + Record.AddDeclRef(M.getAssociatedDeclaration()); } } Index: test/OpenMP/target_data_use_device_ptr_codegen.cpp =================================================================== --- /dev/null +++ test/OpenMP/target_data_use_device_ptr_codegen.cpp @@ -0,0 +1,464 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +///==========================================================================/// +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +double *g; + +// CK1: @g = global double* +// CK1: [[MTYPE00:@.+]] = {{.*}}constant [1 x i32] [i32 547] +// CK1: [[MTYPE01:@.+]] = {{.*}}constant [1 x i32] [i32 547] +// CK1: [[MTYPE03:@.+]] = {{.*}}constant [1 x i32] [i32 547] +// CK1: [[MTYPE04:@.+]] = {{.*}}constant [1 x i32] [i32 547] +// CK1: [[MTYPE05:@.+]] = {{.*}}constant [1 x i32] [i32 547] +// CK1: [[MTYPE06:@.+]] = {{.*}}constant [1 x i32] [i32 547] +// CK1: [[MTYPE07:@.+]] = {{.*}}constant [1 x i32] [i32 547] +// CK1: [[MTYPE08:@.+]] = {{.*}}constant [2 x i32] [{{i32 35, i32 547|i32 547, i32 35}}] +// CK1: [[MTYPE09:@.+]] = {{.*}}constant [2 x i32] [i32 547, i32 547] +// CK1: [[MTYPE10:@.+]] = {{.*}}constant [2 x i32] [i32 547, i32 547] +// CK1: [[MTYPE11:@.+]] = {{.*}}constant [2 x i32] [i32 544, i32 35] +// CK1: [[MTYPE12:@.+]] = {{.*}}constant [2 x i32] [i32 544, i32 35] + +// CK1-LABEL: @_Z3foo +template +void foo(float *&lr, T *&tr) { + float *l; + T *t; + + // CK1-DAG: [[RVAL:%.+]] = bitcast double* [[T:%.+]] to i8* + // CK1-DAG: [[T]] = load double*, double** [[DECL:@g]], + // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: store i8* [[RVAL]], i8** [[BP]], + // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE00]] + // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to double** + // CK1: [[VAL:%.+]] = load double*, double** [[CBP]], + // CK1-NOT: store double* [[VAL]], double** [[DECL]], + // CK1: store double* [[VAL]], double** [[PVT:%.+]], + // CK1: [[TT:%.+]] = load double*, double** [[PVT]], + // CK1: getelementptr inbounds double, double* [[TT]], i32 1 + #pragma omp target data map(g[:10]) use_device_ptr(g) + { + ++g; + } + // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE00]] + // CK1: [[TTT:%.+]] = load double*, double** [[DECL]], + // CK1: getelementptr inbounds double, double* [[TTT]], i32 1 + ++g; + + // CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8* + // CK1-DAG: [[T1]] = load float*, float** [[DECL:%.+]], + // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: store i8* [[RVAL]], i8** [[BP]], + // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE01]] + // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float** + // CK1: [[VAL:%.+]] = load float*, float** [[CBP]], + // CK1-NOT: store float* [[VAL]], float** [[DECL]], + // CK1: store float* [[VAL]], float** [[PVT:%.+]], + // CK1: [[TT1:%.+]] = load float*, float** [[PVT]], + // CK1: getelementptr inbounds float, float* [[TT1]], i32 1 + #pragma omp target data map(l[:10]) use_device_ptr(l) + { + ++l; + } + // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE01]] + // CK1: [[TTT:%.+]] = load float*, float** [[DECL]], + // CK1: getelementptr inbounds float, float* [[TTT]], i32 1 + ++l; + + // CK1-NOT: call void @__tgt_target + // CK1: [[TTT:%.+]] = load float*, float** [[DECL]], + // CK1: getelementptr inbounds float, float* [[TTT]], i32 1 + #pragma omp target data map(l[:10]) use_device_ptr(l) if(0) + { + ++l; + } + // CK1-NOT: call void @__tgt_target + // CK1: [[TTT:%.+]] = load float*, float** [[DECL]], + // CK1: getelementptr inbounds float, float* [[TTT]], i32 1 + ++l; + + // CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8* + // CK1-DAG: [[T1]] = load float*, float** [[DECL:%.+]], + // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: store i8* [[RVAL]], i8** [[BP]], + // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE03]] + // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float** + // CK1: [[VAL:%.+]] = load float*, float** [[CBP]], + // CK1-NOT: store float* [[VAL]], float** [[DECL]], + // CK1: store float* [[VAL]], float** [[PVT:%.+]], + // CK1: [[TT1:%.+]] = load float*, float** [[PVT]], + // CK1: getelementptr inbounds float, float* [[TT1]], i32 1 + #pragma omp target data map(l[:10]) use_device_ptr(l) if(1) + { + ++l; + } + // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE03]] + // CK1: [[TTT:%.+]] = load float*, float** [[DECL]], + // CK1: getelementptr inbounds float, float* [[TTT]], i32 1 + ++l; + + // CK1: [[CMP:%.+]] = icmp ne float* %{{.+}}, null + // CK1: br i1 [[CMP]], label %[[BTHEN:.+]], label %[[BELSE:.+]] + + // CK1: [[BTHEN]]: + // CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8* + // CK1-DAG: [[T1]] = load float*, float** [[DECL:%.+]], + // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: store i8* [[RVAL]], i8** [[BP]], + // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE04]] + // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float** + // CK1: [[VAL:%.+]] = load float*, float** [[CBP]], + // CK1-NOT: store float* [[VAL]], float** [[DECL]], + // CK1: store float* [[VAL]], float** [[PVT:%.+]], + // CK1: [[TT1:%.+]] = load float*, float** [[PVT]], + // CK1: getelementptr inbounds float, float* [[TT1]], i32 1 + // CK1: br label %[[BEND:.+]] + + // CK1: [[BELSE]]: + // CK1: [[TTT:%.+]] = load float*, float** [[DECL]], + // CK1: getelementptr inbounds float, float* [[TTT]], i32 1 + // CK1: br label %[[BEND]] + #pragma omp target data map(l[:10]) use_device_ptr(l) if(lr != 0) + { + ++l; + } + // CK1: [[BEND]]: + // CK1: [[CMP:%.+]] = icmp ne float* %{{.+}}, null + // CK1: br i1 [[CMP]], label %[[BTHEN:.+]], label %[[BELSE:.+]] + + // CK1: [[BTHEN]]: + // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE04]] + // CK1: br label %[[BEND:.+]] + + // CK1: [[BELSE]]: + // CK1: br label %[[BEND]] + + // CK1: [[BEND]]: + // CK1: [[TTT:%.+]] = load float*, float** [[DECL]], + // CK1: getelementptr inbounds float, float* [[TTT]], i32 1 + ++l; + + // CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8* + // CK1-DAG: [[T1]] = load float*, float** [[T2:%.+]], + // CK1-DAG: [[T2]] = load float**, float*** [[DECL:%.+]], + // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: store i8* [[RVAL]], i8** [[BP]], + // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE05]] + // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float** + // CK1: [[VAL:%.+]] = load float*, float** [[CBP]], + // CK1: store float* [[VAL]], float** [[PVTV:%.+]], + // CK1-NOT: store float** [[PVTV]], float*** [[DECL]], + // CK1: store float** [[PVTV]], float*** [[PVT:%.+]], + // CK1: [[TT1:%.+]] = load float**, float*** [[PVT]], + // CK1: [[TT2:%.+]] = load float*, float** [[TT1]], + // CK1: getelementptr inbounds float, float* [[TT2]], i32 1 + #pragma omp target data map(lr[:10]) use_device_ptr(lr) + { + ++lr; + } + // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE05]] + // CK1: [[TTT:%.+]] = load float**, float*** [[DECL]], + // CK1: [[TTTT:%.+]] = load float*, float** [[TTT]], + // CK1: getelementptr inbounds float, float* [[TTTT]], i32 1 + ++lr; + + // CK1-DAG: [[RVAL:%.+]] = bitcast i32* [[T1:%.+]] to i8* + // CK1-DAG: [[T1]] = load i32*, i32** [[DECL:%.+]], + // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: store i8* [[RVAL]], i8** [[BP]], + // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE06]] + // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32** + // CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]], + // CK1-NOT: store i32* [[VAL]], i32** [[DECL]], + // CK1: store i32* [[VAL]], i32** [[PVT:%.+]], + // CK1: [[TT1:%.+]] = load i32*, i32** [[PVT]], + // CK1: getelementptr inbounds i32, i32* [[TT1]], i32 1 + #pragma omp target data map(t[:10]) use_device_ptr(t) + { + ++t; + } + // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE06]] + // CK1: [[TTT:%.+]] = load i32*, i32** [[DECL]], + // CK1: getelementptr inbounds i32, i32* [[TTT]], i32 1 + ++t; + + // CK1-DAG: [[RVAL:%.+]] = bitcast i32* [[T1:%.+]] to i8* + // CK1-DAG: [[T1]] = load i32*, i32** [[T2:%.+]], + // CK1-DAG: [[T2]] = load i32**, i32*** [[DECL:%.+]], + // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: store i8* [[RVAL]], i8** [[BP]], + // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE07]] + // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32** + // CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]], + // CK1: store i32* [[VAL]], i32** [[PVTV:%.+]], + // CK1-NOT: store i32** [[PVTV]], i32*** [[DECL]], + // CK1: store i32** [[PVTV]], i32*** [[PVT:%.+]], + // CK1: [[TT1:%.+]] = load i32**, i32*** [[PVT]], + // CK1: [[TT2:%.+]] = load i32*, i32** [[TT1]], + // CK1: getelementptr inbounds i32, i32* [[TT2]], i32 1 + #pragma omp target data map(tr[:10]) use_device_ptr(tr) + { + ++tr; + } + // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE07]] + // CK1: [[TTT:%.+]] = load i32**, i32*** [[DECL]], + // CK1: [[TTTT:%.+]] = load i32*, i32** [[TTT]], + // CK1: getelementptr inbounds i32, i32* [[TTTT]], i32 1 + ++tr; + + // CK1-DAG: [[RVAL:%.+]] = bitcast float* [[T1:%.+]] to i8* + // CK1-DAG: [[T1]] = load float*, float** [[DECL:%.+]], + // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 + // CK1: store i8* [[RVAL]], i8** [[BP]], + // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE08]] + // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float** + // CK1: [[VAL:%.+]] = load float*, float** [[CBP]], + // CK1-NOT: store float* [[VAL]], float** [[DECL]], + // CK1: store float* [[VAL]], float** [[PVT:%.+]], + // CK1: [[TT1:%.+]] = load float*, float** [[PVT]], + // CK1: getelementptr inbounds float, float* [[TT1]], i32 1 + #pragma omp target data map(l[:10], t[:10]) use_device_ptr(l) + { + ++l; ++t; + } + // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE08]] + // CK1: [[TTT:%.+]] = load float*, float** [[DECL]], + // CK1: getelementptr inbounds float, float* [[TTT]], i32 1 + ++l; ++t; + + + // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE09]] + // CK1: [[_CBP:%.+]] = bitcast i8** {{%.+}} to float** + // CK1: [[_VAL:%.+]] = load float*, float** [[_CBP]], + // CK1: store float* [[_VAL]], float** [[_PVT:%.+]], + // CK1: [[CBP:%.+]] = bitcast i8** {{%.+}} to i32** + // CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]], + // CK1: store i32* [[VAL]], i32** [[PVT:%.+]], + // CK1: [[_TT1:%.+]] = load float*, float** [[_PVT]], + // CK1: getelementptr inbounds float, float* [[_TT1]], i32 1 + // CK1: [[TT1:%.+]] = load i32*, i32** [[PVT]], + // CK1: getelementptr inbounds i32, i32* [[TT1]], i32 1 + #pragma omp target data map(l[:10], t[:10]) use_device_ptr(l) use_device_ptr(t) + { + ++l; ++t; + } + // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE09]] + // CK1: [[_TTT:%.+]] = load float*, float** {{%.+}}, + // CK1: getelementptr inbounds float, float* [[_TTT]], i32 1 + // CK1: [[TTT:%.+]] = load i32*, i32** {{%.+}}, + // CK1: getelementptr inbounds i32, i32* [[TTT]], i32 1 + ++l; ++t; + + // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE10]] + // CK1: [[_CBP:%.+]] = bitcast i8** {{%.+}} to float** + // CK1: [[_VAL:%.+]] = load float*, float** [[_CBP]], + // CK1: store float* [[_VAL]], float** [[_PVT:%.+]], + // CK1: [[CBP:%.+]] = bitcast i8** {{%.+}} to i32** + // CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]], + // CK1: store i32* [[VAL]], i32** [[PVT:%.+]], + // CK1: [[_TT1:%.+]] = load float*, float** [[_PVT]], + // CK1: getelementptr inbounds float, float* [[_TT1]], i32 1 + // CK1: [[TT1:%.+]] = load i32*, i32** [[PVT]], + // CK1: getelementptr inbounds i32, i32* [[TT1]], i32 1 + #pragma omp target data map(l[:10], t[:10]) use_device_ptr(l,t) + { + ++l; ++t; + } + // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE10]] + // CK1: [[_TTT:%.+]] = load float*, float** {{%.+}}, + // CK1: getelementptr inbounds float, float* [[_TTT]], i32 1 + // CK1: [[TTT:%.+]] = load i32*, i32** {{%.+}}, + // CK1: getelementptr inbounds i32, i32* [[TTT]], i32 1 + ++l; ++t; + + // CK1-DAG: [[RVAL:%.+]] = bitcast i32* [[T1:%.+]] to i8* + // CK1-DAG: [[T1]] = load i32*, i32** [[DECL:%.+]], + // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: store i8* [[RVAL]], i8** [[BP]], + // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE11]] + // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32** + // CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]], + // CK1-NOT: store i32* [[VAL]], i32** [[DECL]], + // CK1: store i32* [[VAL]], i32** [[PVT:%.+]], + // CK1: [[TT1:%.+]] = load i32*, i32** [[PVT]], + // CK1: getelementptr inbounds i32, i32* [[TT1]], i32 1 + #pragma omp target data map(l[:10]) use_device_ptr(t) + { + ++l; ++t; + } + // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE11]] + // CK1: [[TTT:%.+]] = load i32*, i32** [[DECL]], + // CK1: getelementptr inbounds i32, i32* [[TTT]], i32 1 + ++l; ++t; + + // CK1-DAG: [[RVAL:%.+]] = bitcast i32* [[T1:%.+]] to i8* + // CK1-DAG: [[T1]] = load i32*, i32** [[T2:%.+]], + // CK1-DAG: [[T2]] = load i32**, i32*** [[DECL:%.+]], + // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: store i8* [[RVAL]], i8** [[BP]], + // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE12]] + // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32** + // CK1: [[VAL:%.+]] = load i32*, i32** [[CBP]], + // CK1: store i32* [[VAL]], i32** [[PVTV:%.+]], + // CK1-NOT: store i32** [[PVTV]], i32*** [[DECL]], + // CK1: store i32** [[PVTV]], i32*** [[PVT:%.+]], + // CK1: [[TT1:%.+]] = load i32**, i32*** [[PVT]], + // CK1: [[TT2:%.+]] = load i32*, i32** [[TT1]], + // CK1: getelementptr inbounds i32, i32* [[TT2]], i32 1 + #pragma omp target data map(l[:10]) use_device_ptr(tr) + { + ++l; ++tr; + } + // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE12]] + // CK1: [[TTT:%.+]] = load i32**, i32*** [[DECL]], + // CK1: [[TTTT:%.+]] = load i32*, i32** [[TTT]], + // CK1: getelementptr inbounds i32, i32* [[TTTT]], i32 1 + ++l; ++tr; + +} + +void bar(float *&a, int *&b) { + foo(a,b); +} + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +// CK2: [[ST:%.+]] = type { double*, double** } +// CK2: [[MTYPE00:@.+]] = {{.*}}constant [2 x i32] [i32 35, i32 531] +// CK2: [[MTYPE01:@.+]] = {{.*}}constant [2 x i32] [i32 35, i32 531] +// CK2: [[MTYPE02:@.+]] = {{.*}}constant [2 x i32] [i32 544, i32 35] +// CK2: [[MTYPE03:@.+]] = {{.*}}constant [3 x i32] [i32 544, i32 35, i32 531] + +template +struct ST { + T *a; + double *&b; + ST(double *&b) : a(0), b(b) {} + + // CK2-LABEL: @{{.*}}foo{{.*}} + void foo(double *&arg) { + int *la = 0; + + // CK2: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 1 + // CK2: store i8* [[RVAL:%.+]], i8** [[BP]], + // CK2: call void @__tgt_target_data_begin{{.+}}[[MTYPE00]] + // CK2: [[CBP:%.+]] = bitcast i8** [[BP]] to double** + // CK2: [[VAL:%.+]] = load double*, double** [[CBP]], + // CK2: store double* [[VAL]], double** [[PVT:%.+]], + // CK2: store double** [[PVT]], double*** [[PVT2:%.+]], + // CK2: [[TT1:%.+]] = load double**, double*** [[PVT2]], + // CK2: [[TT2:%.+]] = load double*, double** [[TT1]], + // CK2: getelementptr inbounds double, double* [[TT2]], i32 1 + #pragma omp target data map(a[:10]) use_device_ptr(a) + { + a++; + } + // CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE00]] + // CK2: [[DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %this1, i32 0, i32 0 + // CK2: [[TTT:%.+]] = load double*, double** [[DECL]], + // CK2: getelementptr inbounds double, double* [[TTT]], i32 1 + a++; + + // CK2: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 1 + // CK2: store i8* [[RVAL:%.+]], i8** [[BP]], + // CK2: call void @__tgt_target_data_begin{{.+}}[[MTYPE01]] + // CK2: [[CBP:%.+]] = bitcast i8** [[BP]] to double** + // CK2: [[VAL:%.+]] = load double*, double** [[CBP]], + // CK2: store double* [[VAL]], double** [[PVT:%.+]], + // CK2: store double** [[PVT]], double*** [[PVT2:%.+]], + // CK2: [[TT1:%.+]] = load double**, double*** [[PVT2]], + // CK2: [[TT2:%.+]] = load double*, double** [[TT1]], + // CK2: getelementptr inbounds double, double* [[TT2]], i32 1 + #pragma omp target data map(b[:10]) use_device_ptr(b) + { + b++; + } + // CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE01]] + // CK2: [[DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %{{.+}}, i32 0, i32 1 + // CK2: [[TTT:%.+]] = load double**, double*** [[DECL]], + // CK2: [[TTTT:%.+]] = load double*, double** [[TTT]], + // CK2: getelementptr inbounds double, double* [[TTTT]], i32 1 + b++; + + // CK2: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0 + // CK2: store i8* [[RVAL:%.+]], i8** [[BP]], + // CK2: call void @__tgt_target_data_begin{{.+}}[[MTYPE02]] + // CK2: [[CBP:%.+]] = bitcast i8** [[BP]] to double** + // CK2: [[VAL:%.+]] = load double*, double** [[CBP]], + // CK2: store double* [[VAL]], double** [[PVT:%.+]], + // CK2: store double** [[PVT]], double*** [[PVT2:%.+]], + // CK2: [[TT1:%.+]] = load double**, double*** [[PVT2]], + // CK2: [[TT2:%.+]] = load double*, double** [[TT1]], + // CK2: getelementptr inbounds double, double* [[TT2]], i32 1 + #pragma omp target data map(la[:10]) use_device_ptr(a) + { + a++; + la++; + } + // CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE02]] + // CK2: [[DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %this1, i32 0, i32 0 + // CK2: [[TTT:%.+]] = load double*, double** [[DECL]], + // CK2: getelementptr inbounds double, double* [[TTT]], i32 1 + a++; + la++; + + // CK2: [[BP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* %{{.+}}, i32 0, i32 0 + // CK2: store i8* [[RVAL:%.+]], i8** [[BP]], + // CK2: [[_BP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* %{{.+}}, i32 0, i32 2 + // CK2: store i8* [[_RVAL:%.+]], i8** [[_BP]], + // CK2: call void @__tgt_target_data_begin{{.+}}[[MTYPE03]] + // CK2: [[CBP:%.+]] = bitcast i8** [[BP]] to double** + // CK2: [[VAL:%.+]] = load double*, double** [[CBP]], + // CK2: store double* [[VAL]], double** [[PVT:%.+]], + // CK2: store double** [[PVT]], double*** [[PVT2:%.+]], + // CK2: [[_CBP:%.+]] = bitcast i8** [[_BP]] to double** + // CK2: [[_VAL:%.+]] = load double*, double** [[_CBP]], + // CK2: store double* [[_VAL]], double** [[_PVT:%.+]], + // CK2: store double** [[_PVT]], double*** [[_PVT2:%.+]], + // CK2: [[TT1:%.+]] = load double**, double*** [[PVT2]], + // CK2: [[TT2:%.+]] = load double*, double** [[TT1]], + // CK2: getelementptr inbounds double, double* [[TT2]], i32 1 + // CK2: [[_TT1:%.+]] = load double**, double*** [[_PVT2]], + // CK2: [[_TT2:%.+]] = load double*, double** [[_TT1]], + // CK2: getelementptr inbounds double, double* [[_TT2]], i32 1 + #pragma omp target data map(b[:10]) use_device_ptr(a, b) + { + a++; + b++; + } + // CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE03]] + // CK2: [[DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %this1, i32 0, i32 0 + // CK2: [[TTT:%.+]] = load double*, double** [[DECL]], + // CK2: getelementptr inbounds double, double* [[TTT]], i32 1 + // CK2: [[_DECL:%.+]] = getelementptr inbounds [[ST]], [[ST]]* %this1, i32 0, i32 1 + // CK2: [[_TTT:%.+]] = load double**, double*** [[_DECL]], + // CK2: [[_TTTT:%.+]] = load double*, double** [[_TTT]], + // CK2: getelementptr inbounds double, double* [[_TTTT]], i32 1 + a++; + b++; + } +}; + +void bar(double *arg){ + ST A(arg); + A.foo(arg); + ++arg; +} +#endif +#endif