diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -5355,14 +5355,14 @@ if (!(--RemainingLists)) { ++DeclCur; ++NumListsCur; - if (SupportsMapper) - ++MapperCur; RemainingLists = *NumListsCur; assert(RemainingLists && "No lists in the following declaration??"); } } ++ListSizeCur; + if (SupportsMapper) + ++MapperCur; return *this; } }; diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -7151,11 +7151,13 @@ /// [ValueDecl *] --> {LE(FieldIndex, Pointer), /// HE(FieldIndex, Pointer)} struct StructRangeInfoTy { + MapCombinedInfoTy PreliminaryMapData; std::pair LowestElem = { 0, Address::invalid()}; std::pair HighestElem = { 0, Address::invalid()}; Address Base = Address::invalid(); + Address LB = Address::invalid(); bool IsArraySection = false; bool HasCompleteRecord = false; }; @@ -7754,11 +7756,9 @@ (IsPointer || ForDeviceAddr) && EncounteredME && (dyn_cast(I->getAssociatedExpression()) == EncounteredME); - if (!OverlappedElements.empty()) { + if (!OverlappedElements.empty() && Next == CE) { // Handle base element with the info for overlapped elements. assert(!PartialStruct.Base.isValid() && "The base element is set."); - assert(Next == CE && - "Expected last element for the overlapped elements."); assert(!IsPointer && "Unexpected base element with the pointer type."); // Mark the whole struct as the struct that requires allocation on the @@ -7775,13 +7775,17 @@ PartialStruct.HighestElem.first)>::max(), HB}; PartialStruct.Base = BP; + PartialStruct.LB = LB; + assert( + PartialStruct.PreliminaryMapData.BasePointers.empty() && + "Overlapped elements must be used only once for the variable."); + std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); // Emit data for non-overlapped data. OpenMPOffloadMappingFlags Flags = OMP_MAP_MEMBER_OF | getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, /*AddPtrFlag=*/false, /*AddIsTargetParamFlag=*/false, IsNonContiguous); - LB = BP; llvm::Value *Size = nullptr; // Do bitcopy of all non-overlapped structure elements. for (OMPClauseMappableExprCommon::MappableExprComponentListRef @@ -7890,6 +7894,7 @@ PartialStruct.HighestElem = {FieldIndex, LB}; } PartialStruct.Base = BP; + PartialStruct.LB = BP; } else if (FieldIndex < PartialStruct.LowestElem.first) { PartialStruct.LowestElem = {FieldIndex, LB}; } else if (FieldIndex > PartialStruct.HighestElem.first) { @@ -8609,8 +8614,8 @@ Address LBAddr = PartialStruct.LowestElem.second; Address HBAddr = PartialStruct.HighestElem.second; if (PartialStruct.HasCompleteRecord) { - LBAddr = PartialStruct.Base; - HBAddr = PartialStruct.Base; + LBAddr = PartialStruct.LB; + HBAddr = PartialStruct.LB; } CombinedInfo.Exprs.push_back(VD); // Base is the base of the struct @@ -8909,11 +8914,17 @@ // Sort the overlapped elements for each item. llvm::SmallVector Layout; if (!OverlappedData.empty()) { - if (const auto *CRD = - VD->getType().getCanonicalType()->getAsCXXRecordDecl()) + const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); + const Type *OrigType = BaseType->getPointeeOrArrayElementType(); + while (BaseType != OrigType) { + BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); + OrigType = BaseType->getPointeeOrArrayElementType(); + } + + if (const auto *CRD = BaseType->getAsCXXRecordDecl()) getPlainLayout(CRD, Layout, /*AsBase=*/false); else { - const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); + const auto *RD = BaseType->getAsRecordDecl(); Layout.append(RD->field_begin(), RD->field_end()); } } @@ -9567,10 +9578,12 @@ /// void *base, void *begin, /// int64_t size, int64_t type, /// void *name = nullptr) { -/// // Allocate space for an array section first. -/// if ((size > 1 || base != begin) && !maptype.IsDelete) +/// // Allocate space for an array section first or add a base/begin for +/// // pointer dereference. +/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && +/// !maptype.IsDelete) /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, -/// size*sizeof(Ty), clearToFrom(type)); +/// size*sizeof(Ty), clearToFromMember(type)); /// // Map members. /// for (unsigned i = 0; i < size; i++) { /// // For each component specified by this mapper: @@ -9585,9 +9598,9 @@ /// } /// } /// // Delete the array section. -/// if ((size > 1 || base != begin) && maptype.IsDelete) +/// if (size > 1 && maptype.IsDelete) /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, -/// size*sizeof(Ty), clearToFrom(type)); +/// size*sizeof(Ty), clearToFromMember(type)); /// } /// \endcode void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, @@ -9851,18 +9864,26 @@ MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); - // base != begin? - llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( - MapperCGF.Builder.CreatePtrDiff(Base, Begin)); - llvm::Value *Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( MapType, MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); llvm::Value *DeleteCond; + llvm::Value *Cond; if (IsInit) { + // base != begin? + llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( + MapperCGF.Builder.CreatePtrDiff(Base, Begin)); + // IsPtrAndObj? + llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); + PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); + BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); + Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); DeleteCond = MapperCGF.Builder.CreateIsNull( DeleteBit, getName({"omp.array", Prefix, ".delete"})); } else { + Cond = IsArray; DeleteCond = MapperCGF.Builder.CreateIsNotNull( DeleteBit, getName({"omp.array", Prefix, ".delete"})); } @@ -9879,7 +9900,8 @@ llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( MapType, MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_FROM))); + MappableExprsHandler::OMP_MAP_FROM | + MappableExprsHandler::OMP_MAP_MEMBER_OF))); llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); // Call the runtime API __tgt_push_mapper_component to fill up the runtime @@ -10171,9 +10193,12 @@ // If there is an entry in PartialStruct it means we have a struct with // individual members mapped. Emit an extra combined entry. - if (PartialStruct.Base.isValid()) - MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, - nullptr, /*NoTargetParam=*/false); + if (PartialStruct.Base.isValid()) { + CombinedInfo.append(PartialStruct.PreliminaryMapData); + MEHandler.emitCombinedEntry( + CombinedInfo, CurInfo.Types, PartialStruct, nullptr, + !PartialStruct.PreliminaryMapData.BasePointers.empty()); + } // We need to append the results of this capture to what we already have. CombinedInfo.append(CurInfo); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5151,6 +5151,146 @@ } } +static ExprResult buildUserDefinedMapperRef(Sema &SemaRef, Scope *S, + CXXScopeSpec &MapperIdScopeSpec, + const DeclarationNameInfo &MapperId, + QualType Type, + Expr *UnresolvedMapper); + +/// Perform DFS through the structure/class data members trying to find +/// member(s) with user-defined 'default' mapper and generate implicit map +/// clauses for such members with the found 'default' mapper. +static void +processImplicitMapsWithDefaultMappers(Sema &S, DSAStackTy *Stack, + SmallVectorImpl &Clauses) { + // Check for the deault mapper for data members. + if (S.getLangOpts().OpenMP < 50) + return; + SmallVector ImplicitMaps; + DeclarationNameInfo DefaultMapperId; + DefaultMapperId.setName(S.Context.DeclarationNames.getIdentifier( + &S.Context.Idents.get("default"))); + for (int Cnt = 0, EndCnt = Clauses.size(); Cnt < EndCnt; ++Cnt) { + auto *C = dyn_cast(Clauses[Cnt]); + if (!C) + continue; + SmallVector SubExprs; + auto *MI = C->mapperlist_begin(); + for (auto I = C->varlist_begin(), End = C->varlist_end(); I != End; + ++I, ++MI) { + // Expression is mapped using mapper - skip it. + if (*MI) + continue; + Expr *E = *I; + // Expression is dependent - skip it, build the mapper when it gets + // instantiated. + if (E->isTypeDependent() || E->isValueDependent() || + E->containsUnexpandedParameterPack()) + continue; + // Array section - need to check for the mapping of the array section + // element. + QualType CanonType = E->getType().getCanonicalType(); + if (CanonType->isSpecificBuiltinType(BuiltinType::OMPArraySection)) { + const auto *OASE = cast(E->IgnoreParenImpCasts()); + QualType BaseType = + OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); + QualType ElemType; + if (const auto *ATy = BaseType->getAsArrayTypeUnsafe()) + ElemType = ATy->getElementType(); + else + ElemType = BaseType->getPointeeType(); + CanonType = ElemType; + } + + // DFS over data members in structures/classes. + SmallVector, 4> Types( + 1, {CanonType, nullptr}); + llvm::DenseMap Visited; + SmallVector, 4> ParentChain( + 1, {nullptr, 1}); + while (!Types.empty()) { + QualType BaseType; + FieldDecl *CurFD; + std::tie(BaseType, CurFD) = Types.pop_back_val(); + while (ParentChain.back().second == 0) + ParentChain.pop_back(); + --ParentChain.back().second; + if (BaseType.isNull()) + continue; + // Only structs/classes are allowed to have mappers. + const RecordDecl *RD = BaseType.getCanonicalType()->getAsRecordDecl(); + if (!RD) + continue; + auto It = Visited.find(BaseType.getTypePtr()); + if (It == Visited.end()) { + // Try to find the associated user-defined mapper. + CXXScopeSpec MapperIdScopeSpec; + ExprResult ER = buildUserDefinedMapperRef( + S, Stack->getCurScope(), MapperIdScopeSpec, DefaultMapperId, + BaseType, /*UnresolvedMapper=*/nullptr); + if (ER.isInvalid()) + continue; + It = Visited.try_emplace(BaseType.getTypePtr(), ER.get()).first; + } + // Found default mapper. + if (It->second) { + auto *OE = new (S.Context) OpaqueValueExpr(E->getExprLoc(), CanonType, + VK_LValue, OK_Ordinary, E); + OE->setIsUnique(/*V=*/true); + Expr *BaseExpr = OE; + for (const auto &P : ParentChain) { + if (P.first) { + BaseExpr = S.BuildMemberExpr( + BaseExpr, /*IsArrow=*/false, E->getExprLoc(), + NestedNameSpecifierLoc(), SourceLocation(), P.first, + DeclAccessPair::make(P.first, P.first->getAccess()), + /*HadMultipleCandidates=*/false, DeclarationNameInfo(), + P.first->getType(), VK_LValue, OK_Ordinary); + BaseExpr = S.DefaultLvalueConversion(BaseExpr).get(); + } + } + if (CurFD) + BaseExpr = S.BuildMemberExpr( + BaseExpr, /*IsArrow=*/false, E->getExprLoc(), + NestedNameSpecifierLoc(), SourceLocation(), CurFD, + DeclAccessPair::make(CurFD, CurFD->getAccess()), + /*HadMultipleCandidates=*/false, DeclarationNameInfo(), + CurFD->getType(), VK_LValue, OK_Ordinary); + SubExprs.push_back(BaseExpr); + continue; + } + // Check for the "default" mapper for data memebers. + bool FirstIter = true; + for (FieldDecl *FD : RD->fields()) { + if (!FD) + continue; + QualType FieldTy = FD->getType(); + if (FieldTy.isNull() || + !(FieldTy->isStructureOrClassType() || FieldTy->isUnionType())) + continue; + if (FirstIter) { + FirstIter = false; + ParentChain.emplace_back(CurFD, 1); + } else { + ++ParentChain.back().second; + } + Types.emplace_back(FieldTy, FD); + } + } + } + if (SubExprs.empty()) + continue; + CXXScopeSpec MapperIdScopeSpec; + DeclarationNameInfo MapperId; + if (OMPClause *NewClause = S.ActOnOpenMPMapClause( + C->getMapTypeModifiers(), C->getMapTypeModifiersLoc(), + MapperIdScopeSpec, MapperId, C->getMapType(), + /*IsMapTypeImplicit=*/true, SourceLocation(), SourceLocation(), + SubExprs, OMPVarListLocTy())) + Clauses.push_back(NewClause); + } +} + StmtResult Sema::ActOnOpenMPExecutableDirective( OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName, OpenMPDirectiveKind CancelRegion, ArrayRef Clauses, @@ -5271,6 +5411,11 @@ } } } + // Build expressions for implicit maps of data members with 'default' + // mappers. + if (LangOpts.OpenMP >= 50) + processImplicitMapsWithDefaultMappers(*this, DSAStack, + ClausesWithImplicit); } llvm::SmallVector AllowedNameModifiers; @@ -17502,6 +17647,14 @@ Components.emplace_back(COCE, nullptr, IsNonContiguous); return true; } + bool VisitOpaqueValueExpr(OpaqueValueExpr *E) { + Expr *Source = E->getSourceExpr(); + if (!Source) { + emitErrorMsg(); + return false; + } + return Visit(Source); + } bool VisitStmt(Stmt *) { emitErrorMsg(); return false; @@ -17913,19 +18066,25 @@ } namespace { -// Utility struct that gathers all the related lists associated with a mappable -// expression. +/// Utility struct that gathers all the related lists associated with a mappable +/// expression. struct MappableVarListInfo { - // The list of expressions. + /// The list of expressions. ArrayRef VarList; - // The list of processed expressions. + /// The list of processed expressions. SmallVector ProcessedVarList; - // The mappble components for each expression. + /// The mappble components for each expression. OMPClauseMappableExprCommon::MappableExprComponentLists VarComponents; - // The base declaration of the variable. + /// The base declaration of the variable. SmallVector VarBaseDeclarations; - // The reference to the user-defined mapper associated with every expression. + /// The reference to the user-defined mapper associated with every expression. SmallVector UDMapperList; + /// List of 'default' wrappers for the subcomponents of the list expressions. + SmallVector, 4> DefaultMappersForSubComponents; + /// List of subcomponents for mapping with the 'default' mapper. Represented + /// as OpaqueValueExpr->field1->field2->...->fieldn. Need to use + /// OpaqueValueExpr since the base expression may be an array section. + SmallVector, 4> SubComponents; MappableVarListInfo(ArrayRef VarList) : VarList(VarList) { // We have a list of components and base declarations for each entry in the @@ -18622,8 +18781,15 @@ Diag(I->second, diag::note_previous_definition); Invalid = true; } - auto *DMD = OMPDeclareMapperDecl::Create(Context, DC, StartLoc, Name, - MapperType, VN, Clauses, PrevDMD); + // Build expressions for implicit maps of data members with 'default' + // mappers. + SmallVector ClausesWithImplicit(Clauses.begin(), + Clauses.end()); + if (LangOpts.OpenMP >= 50) + processImplicitMapsWithDefaultMappers(*this, DSAStack, ClausesWithImplicit); + auto *DMD = + OMPDeclareMapperDecl::Create(Context, DC, StartLoc, Name, MapperType, VN, + ClausesWithImplicit, PrevDMD); if (S) PushOnScopeChains(DMD, S); else diff --git a/clang/test/OpenMP/declare_mapper_codegen.cpp b/clang/test/OpenMP/declare_mapper_codegen.cpp --- a/clang/test/OpenMP/declare_mapper_codegen.cpp +++ b/clang/test/OpenMP/declare_mapper_codegen.cpp @@ -107,7 +107,10 @@ // CK0-DAG: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]] // CK0-DAG: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CK0-DAG: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0 -// CK0-DAG: [[CMP:%.+]] = or i1 [[ISARRAY]], [[PTRSNE]] +// CK0-DAG: [[PTRANDOBJ:%.+]] = and i64 [[TYPE]], 16 +// CK0-DAG: [[ISPTRANDOBJ:%.+]] = icmp ne i64 [[PTRANDOBJ]], 0 +// CK0-DAG: [[CMPA:%.+]] = and i1 [[PTRSNE]], [[ISPTRANDOBJ]] +// CK0-DAG: [[CMP:%.+]] = or i1 [[ISARRAY]], [[CMPA]] // CK0-DAG: [[TYPEDEL:%.+]] = and i64 [[TYPE]], 8 // CK0-DAG: [[ISNOTDEL:%.+]] = icmp eq i64 [[TYPEDEL]], 0 // CK0-DAG: [[CMP1:%.+]] = and i1 [[CMP]], [[ISNOTDEL]] @@ -115,7 +118,7 @@ // CK0: [[INIT]] // CK0-64-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 16 // CK0-32-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 8 -// CK0-DAG: [[ITYPE:%.+]] = and i64 [[TYPE]], -4 +// CK0-DAG: [[ITYPE:%.+]] = and i64 [[TYPE]], 281474976710652 // CK0: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTR]], i8* [[BEGIN]], i64 [[ARRSIZE]], i64 [[ITYPE]], {{.*}}) // CK0: br label %[[LHEAD:[^,]+]] @@ -218,20 +221,14 @@ // CK0: [[LEXIT]] // CK0: [[ISARRAY:%.+]] = icmp sgt i64 [[SIZE]], 1 -// CK0: [[BPTRI:%.+]] = ptrtoint i8* [[BPTR]] to i64 -// CK0: [[PTRI:%.+]] = ptrtoint i8* [[BEGIN]] to i64 -// CK0: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]] -// CK0: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CK0: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0 -// CK0: [[CMP:%.+]] = or i1 [[ISARRAY]], [[PTRSNE]] // CK0: [[TYPEDEL:%.+]] = and i64 [[TYPE]], 8 // CK0: [[ISNOTDEL:%.+]] = icmp ne i64 [[TYPEDEL]], 0 -// CK0: [[CMP1:%.+]] = and i1 [[CMP]], [[ISNOTDEL]] +// CK0: [[CMP1:%.+]] = and i1 [[ISARRAY]], [[ISNOTDEL]] // CK0: br i1 [[CMP1]], label %[[EVALDEL:[^,]+]], label %[[DONE]] // CK0: [[EVALDEL]] // CK0-64-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 16 // CK0-32-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 8 -// CK0-DAG: [[DTYPE:%.+]] = and i64 [[TYPE]], -4 +// CK0-DAG: [[DTYPE:%.+]] = and i64 [[TYPE]], 281474976710652 // CK0: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTR]], i8* [[BEGIN]], i64 [[ARRSIZE]], i64 [[DTYPE]], {{.*}}) // CK0: br label %[[DONE]] // CK0: [[DONE]] @@ -659,7 +656,10 @@ // CK1-DAG: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]] // CK1-DAG: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CK1-DAG: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0 -// CK1-DAG: [[CMP:%.+]] = or i1 [[ISARRAY]], [[PTRSNE]] +// CK1-DAG: [[PTRANDOBJ:%.+]] = and i64 [[TYPE]], 16 +// CK1-DAG: [[ISPTRANDOBJ:%.+]] = icmp ne i64 [[PTRANDOBJ]], 0 +// CK1-DAG: [[CMPA:%.+]] = and i1 [[PTRSNE]], [[ISPTRANDOBJ]] +// CK1-DAG: [[CMP:%.+]] = or i1 [[ISARRAY]], [[CMPA]] // CK1-DAG: [[TYPEDEL:%.+]] = and i64 [[TYPE]], 8 // CK1-DAG: [[ISNOTDEL:%.+]] = icmp eq i64 [[TYPEDEL]], 0 // CK1-DAG: [[CMP1:%.+]] = and i1 [[CMP]], [[ISNOTDEL]] @@ -667,7 +667,7 @@ // CK1: [[INITEVALDEL]] // CK1-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 4 -// CK1-DAG: [[ITYPE:%.+]] = and i64 [[TYPE]], -4 +// CK1-DAG: [[ITYPE:%.+]] = and i64 [[TYPE]], 281474976710652 // CK1: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTR]], i8* [[BEGIN]], i64 [[ARRSIZE]], i64 [[ITYPE]], {{.*}}) // CK1: br label %[[LHEAD:[^,]+]] @@ -709,17 +709,11 @@ // CK1: [[LEXIT]] // CK1: [[ISARRAY:%.+]] = icmp sgt i64 [[SIZE]], 1 -// CK1: [[BPTRI:%.+]] = ptrtoint i8* [[BPTR]] to i64 -// CK1: [[PTRI:%.+]] = ptrtoint i8* [[BEGIN]] to i64 -// CK1: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]] -// CK1: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CK1: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0 -// CK1: [[CMP:%.+]] = or i1 [[ISARRAY]], [[PTRSNE]] // CK1: [[TYPEDEL:%.+]] = and i64 [[TYPE]], 8 // CK1: [[ISNOTDEL:%.+]] = icmp ne i64 [[TYPEDEL]], 0 -// CK1: [[CMP1:%.+]] = and i1 [[CMP]], [[ISNOTDEL]] +// CK1: [[CMP1:%.+]] = and i1 [[ISARRAY]], [[ISNOTDEL]] // CK1-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 4 -// CK1-DAG: [[DTYPE:%.+]] = and i64 [[TYPE]], -4 +// CK1-DAG: [[DTYPE:%.+]] = and i64 [[TYPE]], 281474976710652 // CK1: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTR]], i8* [[BEGIN]], i64 [[ARRSIZE]], i64 [[DTYPE]], {{.*}}) // CK1: br label %[[DONE]] // CK1: [[DONE]] @@ -783,7 +777,10 @@ // CK2-DAG: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]] // CK2-DAG: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CK2-DAG: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0 -// CK2-DAG: [[CMP:%.+]] = or i1 [[ISARRAY]], [[PTRSNE]] +// CK2-DAG: [[PTRANDOBJ:%.+]] = and i64 [[TYPE]], 16 +// CK2-DAG: [[ISPTRANDOBJ:%.+]] = icmp ne i64 [[PTRANDOBJ]], 0 +// CK2-DAG: [[CMPA:%.+]] = and i1 [[PTRSNE]], [[ISPTRANDOBJ]] +// CK2-DAG: [[CMP:%.+]] = or i1 [[ISARRAY]], [[CMPA]] // CK2-DAG: [[TYPEDEL:%.+]] = and i64 [[TYPE]], 8 // CK2-DAG: [[ISNOTDEL:%.+]] = icmp eq i64 [[TYPEDEL]], 0 // CK2-DAG: [[CMP1:%.+]] = and i1 [[CMP]], [[ISNOTDEL]] @@ -791,7 +788,7 @@ // CK2: [[INITEVALDEL]] // CK2-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 16 -// CK2-DAG: [[ITYPE:%.+]] = and i64 [[TYPE]], -4 +// CK2-DAG: [[ITYPE:%.+]] = and i64 [[TYPE]], 281474976710652 // CK2: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTR]], i8* [[BEGIN]], i64 [[ARRSIZE]], i64 [[ITYPE]], {{.*}}) // CK2: br label %[[LHEAD:[^,]+]] @@ -833,19 +830,13 @@ // CK2: [[LEXIT]] // CK2: [[ISARRAY:%.+]] = icmp sgt i64 [[SIZE]], 1 -// CK2: [[BPTRI:%.+]] = ptrtoint i8* [[BPTR]] to i64 -// CK2: [[PTRI:%.+]] = ptrtoint i8* [[BEGIN]] to i64 -// CK2: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]] -// CK2: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CK2: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0 -// CK2: [[CMP:%.+]] = or i1 [[ISARRAY]], [[PTRSNE]] // CK2: [[TYPEDEL:%.+]] = and i64 [[TYPE]], 8 // CK2: [[ISNOTDEL:%.+]] = icmp ne i64 [[TYPEDEL]], 0 -// CK2: [[CMP1:%.+]] = and i1 [[CMP]], [[ISNOTDEL]] +// CK2: [[CMP1:%.+]] = and i1 [[ISARRAY]], [[ISNOTDEL]] // CK2: br i1 [[CMP1]], label %[[EVALDEL:[^,]+]], label %[[DONE]] // CK2: [[EVALDEL]] // CK2-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 16 -// CK2-DAG: [[DTYPE:%.+]] = and i64 [[TYPE]], -4 +// CK2-DAG: [[DTYPE:%.+]] = and i64 [[TYPE]], 281474976710652 // CK2: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTR]], i8* [[BEGIN]], i64 [[ARRSIZE]], i64 [[DTYPE]], {{.*}}) // CK2: br label %[[DONE]] // CK2: [[DONE]] @@ -990,7 +981,10 @@ // CK4-DAG: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]] // CK4-DAG: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CK4-DAG: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0 -// CK4-DAG: [[CMP:%.+]] = or i1 [[ISARRAY]], [[PTRSNE]] +// CK4-DAG: [[PTRANDOBJ:%.+]] = and i64 [[TYPE]], 16 +// CK4-DAG: [[ISPTRANDOBJ:%.+]] = icmp ne i64 [[PTRANDOBJ]], 0 +// CK4-DAG: [[CMPA:%.+]] = and i1 [[PTRSNE]], [[ISPTRANDOBJ]] +// CK4-DAG: [[CMP:%.+]] = or i1 [[ISARRAY]], [[CMPA]] // CK4-DAG: [[TYPEDEL:%.+]] = and i64 [[TYPE]], 8 // CK4-DAG: [[ISNOTDEL:%.+]] = icmp eq i64 [[TYPEDEL]], 0 // CK4-DAG: [[CMP1:%.+]] = and i1 [[CMP]], [[ISNOTDEL]] @@ -999,7 +993,7 @@ // CK4: [[INITEVALDEL]] // CK4-64-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 16 // CK4-32-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 8 -// CK4-DAG: [[ITYPE:%.+]] = and i64 [[TYPE]], -4 +// CK4-DAG: [[ITYPE:%.+]] = and i64 [[TYPE]], 281474976710652 // CK4: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTR]], i8* [[BEGIN]], i64 [[ARRSIZE]], i64 [[ITYPE]], {{.*}}) // CK4: br label %[[LHEAD:[^,]+]] @@ -1102,20 +1096,14 @@ // CK4: [[LEXIT]] // CK4: [[ISARRAY:%.+]] = icmp sgt i64 [[SIZE]], 1 -// CK4: [[BPTRI:%.+]] = ptrtoint i8* [[BPTR]] to i64 -// CK4: [[PTRI:%.+]] = ptrtoint i8* [[BEGIN]] to i64 -// CK4: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]] -// CK4: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CK4: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0 -// CK4: [[CMP:%.+]] = or i1 [[ISARRAY]], [[PTRSNE]] // CK4: [[TYPEDEL:%.+]] = and i64 [[TYPE]], 8 // CK4: [[ISNOTDEL:%.+]] = icmp ne i64 [[TYPEDEL]], 0 -// CK4: [[CMP1:%.+]] = and i1 [[CMP]], [[ISNOTDEL]] +// CK4: [[CMP1:%.+]] = and i1 [[ISARRAY]], [[ISNOTDEL]] // CK4: br i1 [[CMP1]], label %[[EVALDEL:[^,]+]], label %[[DONE]] // CK4: [[EVALDEL]] // CK4-64-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 16 // CK4-32-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 8 -// CK4-DAG: [[DTYPE:%.+]] = and i64 [[TYPE]], -4 +// CK4-DAG: [[DTYPE:%.+]] = and i64 [[TYPE]], 281474976710652 // CK4: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTR]], i8* [[BEGIN]], i64 [[ARRSIZE]], i64 [[DTYPE]], {{.*}}) // CK4: br label %[[DONE]] // CK4: [[DONE]] diff --git a/clang/test/OpenMP/target_map_codegen_34.cpp b/clang/test/OpenMP/target_map_codegen_34.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_map_codegen_34.cpp @@ -0,0 +1,258 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +///==========================================================================/// +// RUN: %clang_cc1 -DCK34 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK34 --check-prefix CK34-64 +// RUN: %clang_cc1 -DCK34 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK34 --check-prefix CK34-64 +// RUN: %clang_cc1 -DCK34 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK34 --check-prefix CK34-32 +// RUN: %clang_cc1 -DCK34 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK34 --check-prefix CK34-32 + +// RUN: %clang_cc1 -DCK34 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY32 %s +// RUN: %clang_cc1 -DCK34 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY32 %s +// RUN: %clang_cc1 -DCK34 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY32 %s +// RUN: %clang_cc1 -DCK34 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY32 %s +// SIMD-ONLY32-NOT: {{__kmpc|__tgt}} +#ifdef CK34 + +class C { +public: + int a; + double *b; +}; + +#pragma omp declare mapper(C s) map(s.a, s.b[0:2]) + +class S { + int a; + C c; + int b; +public: + void foo(); +}; + +// TARGET_PARAM = 0x20 +// MEMBER_OF_1 | TO = 0x1000000000001 +// MEMBER_OF_1 | IMPLICIT | TO = 0x1000000000201 +// CK34-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000001]], i64 [[#0x1000000000001]], i64 [[#0x1000000000201]]] +// TARGET_PARAM = 0x20 +// MEMBER_OF_1 | FROM = 0x1000000000002 +// MEMBER_OF_1 | IMPLICIT | FROM = 0x1000000000202 +// CK34-DAG: [[MTYPE_FROM:@.+]] = {{.+}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000002]], i64 [[#0x1000000000002]], i64 [[#0x1000000000202]]] + +void default_mapper() { + S s; + + // CK34-DAG: call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{.+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE_TO]]{{.+}}, i8** null, i8** [[GEPMF:%.+]]) + // CK34-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] + // CK34-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + // CK34-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + // CK34-DAG: [[GEPMF]] = bitcast [4 x i8*]* [[MF:%.+]] to i8** + + // pass TARGET_PARAM {&s, &s, ((void*)(&s+1)-(void*)&s)} + + // CK34-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK34-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK34-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0 + // CK34-DAG: [[MF0:%.+]] = getelementptr inbounds {{.+}}[[MF]], i{{.+}} 0, i{{.+}} 0 + + // CK34-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to %class.S** + // CK34-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to %class.S** + + // CK34-DAG: store %class.S* [[S_ADDR:%.+]], %class.S** [[BPC0]], + // CK34-DAG: store %class.S* [[S_ADDR]], %class.S** [[PC0]], + // CK34-DAG: store i64 [[S_SIZE:%.+]], i64* [[S0]], + // CK34-DAG: store i8* null, i8** [[MF0]], + + // CK34-DAG: [[S_SIZE]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) + // CK34-DAG: [[SZ]] = sub i64 [[S_1_INTPTR:%.+]], [[S_INTPTR:%.+]] + // CK34-DAG: [[S_1_INTPTR]] = ptrtoint i8* [[S_1_VOID:%.+]] to i64 + // CK34-DAG: [[S_INTPTR]] = ptrtoint i8* [[S_VOID:%.+]] to i64 + // CK34-DAG: [[S_1_VOID]] = bitcast %class.S* [[S_1:%.+]] to i8* + // CK34-DAG: [[S_VOID]] = bitcast %class.S* [[S_ADDR]] to i8* + // CK34-DAG: [[S_1]] = getelementptr %class.S, %class.S* [[S_ADDR]], i32 1 + + // pass MEMBER_OF_1 | TO {&s, &s, ((void*)(&s.a+1)-(void*)&s)} to copy the data of s.a. + + // CK34-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 + // CK34-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 + // CK34-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1 + // CK34-DAG: [[MF1:%.+]] = getelementptr inbounds {{.+}}[[MF]], i{{.+}} 0, i{{.+}} 1 + + // CK34-DAG: [[BPC1:%.+]] = bitcast i8** [[BP1]] to %class.S** + // CK34-DAG: [[PC1:%.+]] = bitcast i8** [[P1]] to %class.S** + + // CK34-DAG: store %class.S* [[S_ADDR]], %class.S** [[BPC1]], + // CK34-DAG: store %class.S* [[S_ADDR]], %class.S** [[PC1]], + // CK34-DAG: store i64 [[A_SIZE:%.+]], i64* [[S1]], + // CK34-DAG: store i8* null, i8** [[MF1]], + + // CK34-DAG: [[A_SIZE]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) + // CK34-DAG: [[SZ]] = sub i64 [[C_BEGIN_INTPTR:%.+]], [[S_INTPTR:%.+]] + // CK34-DAG: [[S_INTPTR]] = ptrtoint i8* [[S_VOID:%.+]] to i64 + // CK34-DAG: [[C_BEGIN_INTPTR]] = ptrtoint i8* [[C_BEGIN_VOID:%.+]] to i64 + // CK34-DAG: [[S_VOID]] = bitcast %class.S* [[S_ADDR]] to i8* + // CK34-DAG: [[C_BEGIN_VOID]] = bitcast %class.C* [[C_ADDR:%.+]] to i8* + // CK34-64-DAG: [[C_ADDR]] = getelementptr inbounds %class.S, %class.S* [[S_ADDR]], i32 0, i32 2 + // CK34-32-DAG: [[C_ADDR]] = getelementptr inbounds %class.S, %class.S* [[S_ADDR]], i32 0, i32 1 + + // pass MEMBER_OF_1 | TO {&s, &s.c+1, ((void*)(&s)+31+1-(void*)(&s.c+1))} to copy the data of s.b. + + // CK34-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2 + // CK34-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2 + // CK34-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2 + // CK34-DAG: [[MF2:%.+]] = getelementptr inbounds {{.+}}[[MF]], i{{.+}} 0, i{{.+}} 2 + + // CK34-DAG: [[BPC2:%.+]] = bitcast i8** [[BP2]] to %class.S** + // CK34-DAG: [[PC2:%.+]] = bitcast i8** [[P2]] to %class.C** + + // CK34-DAG: store %class.S* [[S_ADDR]], %class.S** [[BPC2]], + // CK34-DAG: store %class.C* [[C_END:%.+]], %class.C** [[PC2]], + // CK34-DAG: store i64 [[B_SIZE:%.+]], i64* [[S2]], + // CK34-DAG: store i8* null, i8** [[MF2]], + + // CK34-DAG: [[C_END]] = getelementptr %class.C, %class.C* [[C_ADDR]], i{{.+}} 1 + + // CK34-DAG: [[B_SIZE]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) + // CK34-DAG: [[SZ]] = sub i64 [[S_END_INTPTR:%.+]], [[C_END_INTPTR:%.+]] + // CK34-DAG: [[C_END_INTPTR]] = ptrtoint i8* [[C_END_VOID:%.+]] to i64 + // CK34-DAG: [[S_END_INTPTR]] = ptrtoint i8* [[S_END_VOID:%.+]] to i64 + // CK34-DAG: [[C_END_VOID]] = bitcast %class.C* [[C_END]] to i8* + // CK34-DAG: [[S_END_VOID]] = getelementptr i8, i8* [[S_LAST:%.+]], i{{.+}} 1 + // CK34-64-DAG: [[S_LAST]] = getelementptr i8, i8* [[S_VOID:%.+]], i64 31 + // CK34-32-DAG: [[S_LAST]] = getelementptr i8, i8* [[S_VOID:%.+]], i32 15 + // CK34-DAG: [[S_VOID]] = bitcast %class.S* [[S_ADDR]] to i8* + + // pass MEMBER_OF_1 | TO | IMPLICIT | MAPPER {&s, &s.c, 16} to copy the data of s.c. + + // CK34-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3 + // CK34-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3 + // CK34-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 3 + // CK34-DAG: [[MF3:%.+]] = getelementptr inbounds {{.+}}[[MF]], i{{.+}} 0, i{{.+}} 3 + + // CK34-DAG: [[BPC3:%.+]] = bitcast i8** [[BP3]] to %class.S** + // CK34-DAG: [[PC3:%.+]] = bitcast i8** [[P3]] to %class.C** + + // CK34-DAG: store %class.S* [[S_ADDR]], %class.S** [[BPC3]], + // CK34-DAG: store %class.C* [[C_ADDR:%.+]], %class.C** [[PC3]], + // CK34-64-DAG: store i64 16, i64* [[S3]], + // CK34-32-DAG: store i64 8, i64* [[S3]], + // CK34-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[C_DEFAULT_MAPPER:@.+]] to i8*), i8** [[MF3]], + + // CK34-64-DAG: [[C_ADDR]] = getelementptr inbounds %class.S, %class.S* [[S_ADDR]], i32 0, i32 2 + // CK34-32-DAG: [[C_ADDR]] = getelementptr inbounds %class.S, %class.S* [[S_ADDR]], i32 0, i32 1 + + #pragma omp target map(to: s) + s.foo(); + + // CK34 : call void + + // CK34-DAG: call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{.+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE_FROM]]{{.+}}, i8** null, i8** [[GEPMF:%.+]]) + // CK34-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] + // CK34-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + // CK34-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + // CK34-DAG: [[GEPMF]] = bitcast [4 x i8*]* [[MF:%.+]] to i8** + + // pass TARGET_PARAM {&s, &s, ((void*)(&s+1)-(void*)&s)} + + // CK34-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK34-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK34-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0 + // CK34-DAG: [[MF0:%.+]] = getelementptr inbounds {{.+}}[[MF]], i{{.+}} 0, i{{.+}} 0 + + // CK34-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to %class.S** + // CK34-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to %class.S** + + // CK34-DAG: store %class.S* [[S_ADDR]], %class.S** [[BPC0]], + // CK34-DAG: store %class.S* [[S_ADDR]], %class.S** [[PC0]], + // CK34-DAG: store i64 [[S_SIZE:%.+]], i64* [[S0]], + // CK34-DAG: store i8* null, i8** [[MF0]], + + // CK34-DAG: [[S_SIZE]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) + // CK34-DAG: [[SZ]] = sub i64 [[S_1_INTPTR:%.+]], [[S_INTPTR:%.+]] + // CK34-DAG: [[S_1_INTPTR]] = ptrtoint i8* [[S_1_VOID:%.+]] to i64 + // CK34-DAG: [[S_INTPTR]] = ptrtoint i8* [[S_VOID:%.+]] to i64 + // CK34-DAG: [[S_1_VOID]] = bitcast %class.S* [[S_1:%.+]] to i8* + // CK34-DAG: [[S_VOID]] = bitcast %class.S* [[S_ADDR]] to i8* + // CK34-DAG: [[S_1]] = getelementptr %class.S, %class.S* [[S_ADDR]], i32 1 + + // pass MEMBER_OF_1 | FROM {&s, &s, ((void*)(&s.a+1)-(void*)&s)} to copy the data of s.a. + + // CK34-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 + // CK34-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 + // CK34-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1 + // CK34-DAG: [[MF1:%.+]] = getelementptr inbounds {{.+}}[[MF]], i{{.+}} 0, i{{.+}} 1 + + // CK34-DAG: [[BPC1:%.+]] = bitcast i8** [[BP1]] to %class.S** + // CK34-DAG: [[PC1:%.+]] = bitcast i8** [[P1]] to %class.S** + + // CK34-DAG: store %class.S* [[S_ADDR]], %class.S** [[BPC1]], + // CK34-DAG: store %class.S* [[S_ADDR]], %class.S** [[PC1]], + // CK34-DAG: store i64 [[A_SIZE:%.+]], i64* [[S1]], + // CK34-DAG: store i8* null, i8** [[MF1]], + + // CK34-DAG: [[A_SIZE]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) + // CK34-DAG: [[SZ]] = sub i64 [[C_BEGIN_INTPTR:%.+]], [[S_INTPTR:%.+]] + // CK34-DAG: [[S_INTPTR]] = ptrtoint i8* [[S_VOID:%.+]] to i64 + // CK34-DAG: [[C_BEGIN_INTPTR]] = ptrtoint i8* [[C_BEGIN_VOID:%.+]] to i64 + // CK34-DAG: [[S_VOID]] = bitcast %class.S* [[S_ADDR]] to i8* + // CK34-DAG: [[C_BEGIN_VOID]] = bitcast %class.C* [[C_ADDR:%.+]] to i8* + // CK34-64-DAG: [[C_ADDR]] = getelementptr inbounds %class.S, %class.S* [[S_ADDR]], i32 0, i32 2 + // CK34-32-DAG: [[C_ADDR]] = getelementptr inbounds %class.S, %class.S* [[S_ADDR]], i32 0, i32 1 + + // pass MEMBER_OF_1 | FROM {&s, &s.c+1, ((void*)(&s)+31+1-(void*)(&s.c+1))} to copy the data of s.b. + + // CK34-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2 + // CK34-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2 + // CK34-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2 + // CK34-DAG: [[MF2:%.+]] = getelementptr inbounds {{.+}}[[MF]], i{{.+}} 0, i{{.+}} 2 + + // CK34-DAG: [[BPC2:%.+]] = bitcast i8** [[BP2]] to %class.S** + // CK34-DAG: [[PC2:%.+]] = bitcast i8** [[P2]] to %class.C** + + // CK34-DAG: store %class.S* [[S_ADDR]], %class.S** [[BPC2]], + // CK34-DAG: store %class.C* [[C_END:%.+]], %class.C** [[PC2]], + // CK34-DAG: store i64 [[B_SIZE:%.+]], i64* [[S2]], + // CK34-DAG: store i8* null, i8** [[MF2]], + + // CK34-DAG: [[C_END]] = getelementptr %class.C, %class.C* [[C_ADDR]], i{{.+}} 1 + + // CK34-DAG: [[B_SIZE]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) + // CK34-DAG: [[SZ]] = sub i64 [[S_END_INTPTR:%.+]], [[C_END_INTPTR:%.+]] + // CK34-DAG: [[C_END_INTPTR]] = ptrtoint i8* [[C_END_VOID:%.+]] to i64 + // CK34-DAG: [[S_END_INTPTR]] = ptrtoint i8* [[S_END_VOID:%.+]] to i64 + // CK34-DAG: [[C_END_VOID]] = bitcast %class.C* [[C_END]] to i8* + // CK34-DAG: [[S_END_VOID]] = getelementptr i8, i8* [[S_LAST:%.+]], i{{.+}} 1 + // CK34-64-DAG: [[S_LAST]] = getelementptr i8, i8* [[S_VOID:%.+]], i64 31 + // CK34-32-DAG: [[S_LAST]] = getelementptr i8, i8* [[S_VOID:%.+]], i32 15 + // CK34-DAG: [[S_VOID]] = bitcast %class.S* [[S_ADDR]] to i8* + + // pass MEMBER_OF_1 | FROM | IMPLICIT | MAPPER {&s, &s.c, 16} to copy the data of s.c. + + // CK34-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3 + // CK34-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3 + // CK34-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 3 + // CK34-DAG: [[MF3:%.+]] = getelementptr inbounds {{.+}}[[MF]], i{{.+}} 0, i{{.+}} 3 + + // CK34-DAG: [[BPC3:%.+]] = bitcast i8** [[BP3]] to %class.S** + // CK34-DAG: [[PC3:%.+]] = bitcast i8** [[P3]] to %class.C** + + // CK34-DAG: store %class.S* [[S_ADDR]], %class.S** [[BPC3]], + // CK34-DAG: store %class.C* [[C_ADDR:%.+]], %class.C** [[PC3]], + // CK34-64-DAG: store i64 16, i64* [[S3]], + // CK34-32-DAG: store i64 8, i64* [[S3]], + // CK34-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[C_DEFAULT_MAPPER]] to i8*), i8** [[MF3]], + + // CK34-64-DAG: [[C_ADDR]] = getelementptr inbounds %class.S, %class.S* [[S_ADDR]], i32 0, i32 2 + // CK34-32-DAG: [[C_ADDR]] = getelementptr inbounds %class.S, %class.S* [[S_ADDR]], i32 0, i32 1 + + #pragma omp target map(from: s) + s.foo(); +} + +#endif // CK34 +#endif diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -269,10 +269,11 @@ MapperArgNames[I] = C.Name; } - int rc = target_data_function( - loc, Device, MapperComponents.Components.size(), MapperArgsBase.data(), - MapperArgs.data(), MapperArgSizes.data(), MapperArgTypes.data(), - MapperArgNames.data(), /*arg_mappers*/ nullptr, AsyncInfo); + int rc = target_data_function(loc, Device, MapperComponents.Components.size(), + MapperArgsBase.data(), MapperArgs.data(), + MapperArgSizes.data(), MapperArgTypes.data(), + MapperArgNames.data(), /*arg_mappers*/ nullptr, + AsyncInfo, /*FromMapper=*/true); return rc; } @@ -281,7 +282,8 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, map_var_info_t *arg_names, - void **arg_mappers, AsyncInfoTy &AsyncInfo) { + void **arg_mappers, AsyncInfoTy &AsyncInfo, + bool FromMapper) { // process each input. for (int32_t i = 0; i < arg_num; ++i) { // Ignore private variables and arrays - there is no mapping for them. @@ -379,7 +381,10 @@ Pointer_HstPtrBegin = HstPtrBase; // modify current entry. HstPtrBase = *(void **)HstPtrBase; - UpdateRef = true; // subsequently update ref count of pointee + // No need to update pointee ref count for the first element of the + // subelement that comes from mapper. + UpdateRef = + (!FromMapper || i != 0); // subsequently update ref count of pointee } void *TgtPtrBegin = Device.getOrAllocTgtPtr( @@ -483,7 +488,7 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, - void **ArgMappers, AsyncInfoTy &AsyncInfo) { + void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) { int Ret; std::vector DeallocTgtPtrs; // process each input. @@ -536,7 +541,8 @@ bool IsLast, IsHostPtr; bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT; bool UpdateRef = !(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) || - (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ); + (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && + (!FromMapper || I != ArgNum - 1)); bool ForceDelete = ArgTypes[I] & OMP_TGT_MAPTYPE_DELETE; bool HasCloseModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_CLOSE; bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT; @@ -584,8 +590,13 @@ bool DelEntry = IsLast || ForceDelete; - if ((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && - !(ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { + // If the last element from the mapper (for end transfer args comes in + // reverse order), do not remove the partial entry, the parent struct still + // exists. + if (((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && + !(ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) || + (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && FromMapper && + I == ArgNum - 1)) { DelEntry = false; // protect parent struct from being deallocated } @@ -822,7 +833,7 @@ int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, - void **ArgMappers, AsyncInfoTy &AsyncInfo) { + void **ArgMappers, AsyncInfoTy &AsyncInfo, bool) { // process each input. for (int32_t I = 0; I < ArgNum; ++I) { if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) || diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -23,17 +23,20 @@ extern int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, map_var_info_t *arg_names, - void **arg_mappers, AsyncInfoTy &AsyncInfo); + void **arg_mappers, AsyncInfoTy &AsyncInfo, + bool FromMapper = false); extern int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *arg_names, - void **ArgMappers, AsyncInfoTy &AsyncInfo); + void **ArgMappers, AsyncInfoTy &AsyncInfo, + bool FromMapper = false); extern int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, map_var_info_t *arg_names, - void **arg_mappers, AsyncInfoTy &AsyncInfo); + void **arg_mappers, AsyncInfoTy &AsyncInfo, + bool FromMapper = false); extern int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, @@ -76,7 +79,8 @@ // targetDataEnd and targetDataUpdate). typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **, void **, int64_t *, int64_t *, - map_var_info_t *, void **, AsyncInfoTy &); + map_var_info_t *, void **, AsyncInfoTy &, + bool); // Implemented in libomp, they are called from within __tgt_* functions. #ifdef __cplusplus diff --git a/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp b/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp @@ -0,0 +1,63 @@ +// RUN: %libomptarget-compilexx-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-x86_64-pc-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-nvptx64-nvidia-cuda + +#include +#include + +typedef struct { + int a; + double *b; +} C1; +#pragma omp declare mapper(C1 s) map(to : s.a) map(from : s.b [0:2]) + +typedef struct { + int a; + double *b; + C1 c; +} C; +#pragma omp declare mapper(C s) map(to : s.a, s.c) map(from : s.b [0:2]) + +typedef struct { + int e; + C f; + int h; +} D; + +int main() { + constexpr int N = 10; + D s; + s.e = 111; + s.f.a = 222; + s.f.c.a = 777; + double x[2]; + double x1[2]; + x[1] = 20; + s.f.b = &x[0]; + s.f.c.b = &x1[0]; + s.h = N; + + D *sp = &s; + D **spp = &sp; + + printf("%d %d %d %4.5f %d\n", spp[0][0].e, spp[0][0].f.a, spp[0][0].f.c.a, + spp[0][0].f.b[1], spp[0][0].f.b == &x[0] ? 1 : 0); + // CHECK: 111 222 777 20.00000 1 + + __intptr_t p = reinterpret_cast<__intptr_t>(&x[0]); +#pragma omp target map(tofrom : spp[0][0]) firstprivate(p) + { + printf("%d %d %d\n", spp[0][0].f.a, spp[0][0].f.c.a, + spp[0][0].f.b == reinterpret_cast(p) ? 1 : 0); + // CHECK: 222 777 0 + spp[0][0].e = 333; + spp[0][0].f.a = 444; + spp[0][0].f.c.a = 555; + spp[0][0].f.b[1] = 40; + } + printf("%d %d %d %4.5f %d\n", spp[0][0].e, spp[0][0].f.a, spp[0][0].f.c.a, + spp[0][0].f.b[1], spp[0][0].f.b == &x[0] ? 1 : 0); + // CHECK: 333 222 777 40.00000 1 +}