Index: include/clang/AST/Stmt.h =================================================================== --- include/clang/AST/Stmt.h +++ include/clang/AST/Stmt.h @@ -1986,6 +1986,7 @@ enum VariableCaptureKind { VCK_This, VCK_ByRef, + VCK_ByCopy, VCK_VLAType, }; @@ -2005,21 +2006,7 @@ /// \param Var The variable being captured, or null if capturing this. /// Capture(SourceLocation Loc, VariableCaptureKind Kind, - VarDecl *Var = nullptr) - : VarAndKind(Var, Kind), Loc(Loc) { - switch (Kind) { - case VCK_This: - assert(!Var && "'this' capture cannot have a variable!"); - break; - case VCK_ByRef: - assert(Var && "capturing by reference must have a variable!"); - break; - case VCK_VLAType: - assert(!Var && - "Variable-length array type capture cannot have a variable!"); - break; - } - } + VarDecl *Var = nullptr); /// \brief Determine the kind of capture. VariableCaptureKind getCaptureKind() const { return VarAndKind.getInt(); } @@ -2031,9 +2018,14 @@ /// \brief Determine whether this capture handles the C++ 'this' pointer. bool capturesThis() const { return getCaptureKind() == VCK_This; } - /// \brief Determine whether this capture handles a variable. + /// \brief Determine whether this capture handles a variable (by reference). bool capturesVariable() const { return getCaptureKind() == VCK_ByRef; } + /// \brief Determine whether this capture handles a variable by copy. + bool capturesVariableByCopy() const { + return getCaptureKind() == VCK_ByCopy; + } + /// \brief Determine whether this capture handles a variable-length array /// type. bool capturesVariableArrayType() const { @@ -2044,7 +2036,7 @@ /// /// This operation is only valid if this capture captures a variable. VarDecl *getCapturedVar() const { - assert(capturesVariable() && + assert((capturesVariable() || capturesVariableByCopy()) && "No variable available for 'this' or VAT capture"); return VarAndKind.getPointer(); } Index: include/clang/Sema/Sema.h =================================================================== --- include/clang/Sema/Sema.h +++ include/clang/Sema/Sema.h @@ -7751,6 +7751,12 @@ ExprResult VerifyPositiveIntegerConstantInClause(Expr *Op, OpenMPClauseKind CKind); public: + /// \brief Return true if the provided declaration \a VD should be captured by + /// reference in the provided scope \a RSI. This will take into account the + /// semantics of the directive and associated clauses. + bool IsOpenMPCapturedByRef(VarDecl *VD, + const sema::CapturedRegionScopeInfo *RSI); + /// \brief Check if the specified variable is used in one of the private /// clauses (private, firstprivate, lastprivate, reduction etc.) in OpenMP /// constructs. Index: lib/AST/Stmt.cpp =================================================================== --- lib/AST/Stmt.cpp +++ lib/AST/Stmt.cpp @@ -945,6 +945,33 @@ return new(C)SEHFinallyStmt(Loc,Block); } +CapturedStmt::Capture::Capture(SourceLocation Loc, VariableCaptureKind Kind, + VarDecl *Var) + : VarAndKind(Var, Kind), Loc(Loc) { + switch (Kind) { + case VCK_This: + assert(!Var && "'this' capture cannot have a variable!"); + break; + case VCK_ByRef: + assert(Var && "capturing by reference must have a variable!"); + break; + case VCK_ByCopy: + assert(Var && "capturing by copy must have a variable!"); + assert( + (Var->getType()->isScalarType() || (Var->getType()->isReferenceType() && + Var->getType() + ->castAs() + ->getPointeeType() + ->isScalarType())) && + "captures by copy are expected to have a scalar type!"); + break; + case VCK_VLAType: + assert(!Var && + "Variable-length array type capture cannot have a variable!"); + break; + } +} + CapturedStmt::Capture *CapturedStmt::getStoredCaptures() const { unsigned Size = sizeof(CapturedStmt) + sizeof(Stmt *) * (NumCaptures + 1); Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -3180,7 +3180,7 @@ CodeGenFunction CGF(CGM, true); CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateOpenMPCapturedStmtFunction(CS, /*UseOnlyReferences=*/true); + return CGF.GenerateOpenMPCapturedStmtFunction(CS); } void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, @@ -3195,6 +3195,10 @@ OMP_MAP_TO = 0x01, /// \brief Allocate memory on the device and move data from device to host. OMP_MAP_FROM = 0x02, + /// \brief The element passed to the device is a pointer. + OMP_MAP_PTR = 0x20, + /// \brief Pass the element to the device by value. + OMP_MAP_BYCOPY = 0x80, }; enum OpenMPOffloadingReservedDeviceIDs { @@ -3203,6 +3207,8 @@ OMP_DEVICEID_UNDEF = -1, }; + auto &Ctx = CGF.getContext(); + // Fill up the arrays with the all the captured variables. SmallVector BasePointers; SmallVector Pointers; @@ -3225,27 +3231,61 @@ llvm::Value *Size; unsigned MapType; + // VLA sizes are passed to the outlined region by copy. if (CI->capturesVariableArrayType()) { BasePointer = Pointer = *CV; Size = getTypeSize(CGF, RI->getType()); + // Copy to the device as an argument. No need to retrieve it. + MapType = OMP_MAP_BYCOPY; hasVLACaptures = true; - // VLA sizes don't need to be copied back from the device. - MapType = OMP_MAP_TO; } else if (CI->capturesThis()) { BasePointer = Pointer = *CV; const PointerType *PtrTy = cast(RI->getType().getTypePtr()); Size = getTypeSize(CGF, PtrTy->getPointeeType()); // Default map type. MapType = OMP_MAP_TO | OMP_MAP_FROM; + } else if (CI->capturesVariableByCopy()) { + MapType = OMP_MAP_BYCOPY; + if (!RI->getType()->isAnyPointerType()) { + // If the field is not a pointer, we need to save the actual value and + // load it as a void pointer. + auto DstAddr = CGF.CreateMemTemp( + Ctx.getUIntPtrType(), + Twine(CI->getCapturedVar()->getName()) + ".casted"); + LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); + + auto *SrcAddrVal = CGF.EmitScalarConversion( + DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), + Ctx.getPointerType(RI->getType()), SourceLocation()); + LValue SrcLV = + CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType()); + + // Store the value using the source type pointer. + CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV); + + // Load the value using the destination type pointer. + BasePointer = Pointer = + CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); + } else { + MapType |= OMP_MAP_PTR; + BasePointer = Pointer = *CV; + } + Size = getTypeSize(CGF, RI->getType()); } else { + assert(CI->capturesVariable() && "Expected captured reference."); BasePointer = Pointer = *CV; const ReferenceType *PtrTy = cast(RI->getType().getTypePtr()); QualType ElementType = PtrTy->getPointeeType(); Size = getTypeSize(CGF, ElementType); - // Default map type. - MapType = OMP_MAP_TO | OMP_MAP_FROM; + // The default map type for a scalar/complex type is 'to' because by + // default the value doesn't have to be retrieved. For an aggregate type, + // the default is 'tofrom'. + MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) + : OMP_MAP_TO; + if (ElementType->isAnyPointerType()) + MapType |= OMP_MAP_PTR; } BasePointers.push_back(BasePointer); @@ -3256,7 +3296,7 @@ // Keep track on whether the host function has to be executed. auto OffloadErrorQType = - CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); + Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); auto OffloadError = CGF.MakeAddrLValue( CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), OffloadErrorQType); @@ -3264,7 +3304,7 @@ OffloadError); // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, + auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, hasVLACaptures, Device, OffloadError, OffloadErrorQType](CodeGenFunction &CGF) { unsigned PointerNumVal = BasePointers.size(); @@ -3276,8 +3316,8 @@ if (PointerNumVal) { llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); - QualType PointerArrayType = CGF.getContext().getConstantArrayType( - CGF.getContext().VoidPtrTy, PointerNumAP, ArrayType::Normal, + QualType PointerArrayType = Ctx.getConstantArrayType( + Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, /*IndexTypeQuals=*/0); BasePointersArray = @@ -3289,8 +3329,8 @@ // sizes, otherwise we need to fill up the arrays as we do for the // pointers. if (hasVLACaptures) { - QualType SizeArrayType = CGF.getContext().getConstantArrayType( - CGF.getContext().getSizeType(), PointerNumAP, ArrayType::Normal, + QualType SizeArrayType = Ctx.getConstantArrayType( + Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, /*IndexTypeQuals=*/0); SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); @@ -3323,29 +3363,41 @@ MapTypesArray = MapTypesArrayGbl; for (unsigned i = 0; i < PointerNumVal; ++i) { + + llvm::Value *BPVal = BasePointers[i]; + if (BPVal->getType()->isPointerTy()) + BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); + else { + assert(BPVal->getType()->isIntegerTy() && + "If not a pointer, the value type must be an integer."); + BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); + } llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, 0, i); - Address BPAddr(BP, CGM.getContext().getTypeAlignInChars( - CGM.getContext().VoidPtrTy)); - CGF.Builder.CreateStore( - CGF.Builder.CreateBitCast(BasePointers[i], CGM.VoidPtrTy), BPAddr); - + Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); + CGF.Builder.CreateStore(BPVal, BPAddr); + + llvm::Value *PVal = Pointers[i]; + if (PVal->getType()->isPointerTy()) + PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); + else { + assert(PVal->getType()->isIntegerTy() && + "If not a pointer, the value type must be an integer."); + PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); + } llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0, i); - Address PAddr(P, CGM.getContext().getTypeAlignInChars( - CGM.getContext().VoidPtrTy)); - CGF.Builder.CreateStore( - CGF.Builder.CreateBitCast(Pointers[i], CGM.VoidPtrTy), PAddr); + Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); + CGF.Builder.CreateStore(PVal, PAddr); if (hasVLACaptures) { llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, /*Idx0=*/0, /*Idx1=*/i); - Address SAddr(S, CGM.getContext().getTypeAlignInChars( - CGM.getContext().getSizeType())); + Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); CGF.Builder.CreateStore(CGF.Builder.CreateIntCast( Sizes[i], CGM.SizeTy, /*isSigned=*/true), SAddr); Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -21,8 +21,7 @@ using namespace CodeGen; void CodeGenFunction::GenerateOpenMPCapturedVars( - const CapturedStmt &S, SmallVectorImpl &CapturedVars, - bool UseOnlyReferences) { + const CapturedStmt &S, SmallVectorImpl &CapturedVars) { const RecordDecl *RD = S.getCapturedRecordDecl(); auto CurField = RD->field_begin(); auto CurCap = S.captures().begin(); @@ -32,26 +31,46 @@ if (CurField->hasCapturedVLAType()) { auto VAT = CurField->getCapturedVLAType(); auto *Val = VLASizeMap[VAT->getSizeExpr()]; - // If we need to use only references, create a temporary location for the - // size of the VAT. - if (UseOnlyReferences) { - LValue LV = - MakeAddrLValue(CreateMemTemp(CurField->getType(), "__vla_size_ref"), - CurField->getType()); - EmitStoreThroughLValue(RValue::get(Val), LV); - Val = LV.getAddress().getPointer(); - } CapturedVars.push_back(Val); } else if (CurCap->capturesThis()) CapturedVars.push_back(CXXThisValue); - else + else if (CurCap->capturesVariableByCopy()) + CapturedVars.push_back( + EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal()); + else { + assert(CurCap->capturesVariable() && "Expected capture by reference."); CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); + } + } +} + +static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, + StringRef Name, LValue AddrLV, + bool isReferenceType = false) { + ASTContext &Ctx = CGF.getContext(); + + auto *CastedPtr = CGF.EmitScalarConversion( + AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), + Ctx.getPointerType(DstType), SourceLocation()); + auto TmpAddr = + CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) + .getAddress(); + + // If we are dealing with references we need to return the address of the + // reference instead of the reference of the value. + if (isReferenceType) { + QualType RefType = Ctx.getLValueReferenceType(DstType); + auto *RefVal = TmpAddr.getPointer(); + TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); + auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); + CGF.EmitScalarInit(RefVal, TmpLVal); } + + return TmpAddr; } llvm::Function * -CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, - bool UseOnlyReferences) { +CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { assert( CapturedStmtInfo && "CapturedStmtInfo should be set when generating the captured function"); @@ -69,7 +88,17 @@ QualType ArgType = FD->getType(); IdentifierInfo *II = nullptr; VarDecl *CapVar = nullptr; - if (I->capturesVariable()) { + + // If this is a capture by copy and the type is not a pointer, the outlined + // function argument type should be uintptr and the value properly casted to + // uintptr. This is necessary given that the runtime library is only able to + // deal with pointers. We can pass in the same way the VLA type sizes to the + // outlined function. + if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || + I->capturesVariableArrayType()) + ArgType = Ctx.getUIntPtrType(); + + if (I->capturesVariable() || I->capturesVariableByCopy()) { CapVar = I->getCapturedVar(); II = CapVar->getIdentifier(); } else if (I->capturesThis()) @@ -77,9 +106,6 @@ else { assert(I->capturesVariableArrayType()); II = &getContext().Idents.get("vla"); - if (UseOnlyReferences) - ArgType = getContext().getLValueReferenceType( - ArgType, /*SpelledAsLValue=*/false); } if (ArgType->isVariablyModifiedType()) ArgType = getContext().getVariableArrayDecayedType(ArgType); @@ -111,15 +137,24 @@ unsigned Cnt = CD->getContextParamPosition(); I = S.captures().begin(); for (auto *FD : RD->fields()) { + // If we are capturing a pointer by copy we don't need to do anything, just + // use the value that we get from the arguments. + if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { + setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt])); + ++Cnt, ++I; + continue; + } + LValue ArgLVal = MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), AlignmentSource::Decl); if (FD->hasCapturedVLAType()) { - if (UseOnlyReferences) - ArgLVal = EmitLoadOfReferenceLValue( - ArgLVal.getAddress(), ArgLVal.getType()->castAs()); + LValue CastedArgLVal = + MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), + Args[Cnt]->getName(), ArgLVal), + FD->getType(), AlignmentSource::Decl); auto *ExprArg = - EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); + EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); auto VAT = FD->getCapturedVLAType(); VLASizeMap[VAT->getSizeExpr()] = ExprArg; } else if (I->capturesVariable()) { @@ -132,6 +167,15 @@ } setAddrOfLocalVar( Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); + } else if (I->capturesVariableByCopy()) { + assert(!FD->getType()->isAnyPointerType() && + "Not expecting a captured pointer."); + auto *Var = I->getCapturedVar(); + QualType VarTy = Var->getType(); + setAddrOfLocalVar(I->getCapturedVar(), + castValueFromUintptr(*this, FD->getType(), + Args[Cnt]->getName(), ArgLVal, + VarTy->isReferenceType())); } else { // If 'this' is captured, load it into CXXThisValue. assert(I->capturesThis()); @@ -2479,7 +2523,7 @@ const CapturedStmt &CS = *cast(S.getAssociatedStmt()); llvm::SmallVector CapturedVars; - GenerateOpenMPCapturedVars(CS, CapturedVars, /*UseOnlyReferences=*/true); + GenerateOpenMPCapturedVars(CS, CapturedVars); // Emit target region as a standalone region. auto &&CodeGen = [&CS](CodeGenFunction &CGF) { Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -2200,12 +2200,9 @@ llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K); llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S); Address GenerateCapturedStmtArgument(const CapturedStmt &S); - llvm::Function * - GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, - bool UseOnlyReferences = false); + llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S); void GenerateOpenMPCapturedVars(const CapturedStmt &S, - SmallVectorImpl &CapturedVars, - bool UseOnlyReferences = false); + SmallVectorImpl &CapturedVars); /// \brief Perform element by element copying of arrays with type \a /// OriginalType from \a SrcAddr to \a DestAddr using copying procedure /// generated by \a CopyGen. Index: lib/Sema/SemaExpr.cpp =================================================================== --- lib/Sema/SemaExpr.cpp +++ lib/Sema/SemaExpr.cpp @@ -12621,10 +12621,15 @@ // Compute the type of an expression that refers to this variable. DeclRefType = CaptureType.getNonReferenceType(); - + + // Similarly to mutable captures in lambda, all the OpenMP captures by copy + // are mutable in the sense that user can change their value - they are + // private instances of the captured declarations. const CapturingScopeInfo::Capture &Cap = CSI->getCapture(Var); if (Cap.isCopyCapture() && - !(isa(CSI) && cast(CSI)->Mutable)) + !(isa(CSI) && cast(CSI)->Mutable) && + !(isa(CSI) && + cast(CSI)->CapRegionKind == CR_OpenMP)) DeclRefType.addConst(); return true; } @@ -12812,9 +12817,17 @@ // By default, capture variables by reference. bool ByRef = true; // Using an LValue reference type is consistent with Lambdas (see below). - if (S.getLangOpts().OpenMP && S.IsOpenMPCapturedVar(Var)) - DeclRefType = DeclRefType.getUnqualifiedType(); - CaptureType = S.Context.getLValueReferenceType(DeclRefType); + if (S.getLangOpts().OpenMP) { + ByRef = S.IsOpenMPCapturedByRef(Var, RSI); + if (S.IsOpenMPCapturedVar(Var)) + DeclRefType = DeclRefType.getUnqualifiedType(); + } + + if (ByRef) + CaptureType = S.Context.getLValueReferenceType(DeclRefType); + else + CaptureType = DeclRefType; + Expr *CopyExpr = nullptr; if (BuildAndDiagnose) { // The current implementation assumes that all variables are captured Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -222,6 +222,8 @@ return Stack[Stack.size() - 2].Directive; return OMPD_unknown; } + /// \brief Return the directive associated with the provided scope. + OpenMPDirectiveKind getDirectiveForScope(const Scope *S) const; /// \brief Set default data sharing attribute to none. void setDefaultDSANone(SourceLocation Loc) { @@ -728,12 +730,107 @@ return false; } +OpenMPDirectiveKind DSAStackTy::getDirectiveForScope(const Scope *S) const { + for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I) + if (I->CurScope == S) + return I->Directive; + return OMPD_unknown; +} + void Sema::InitDataSharingAttributesStack() { VarDataSharingAttributesStack = new DSAStackTy(*this); } #define DSAStack static_cast(VarDataSharingAttributesStack) +bool Sema::IsOpenMPCapturedByRef(VarDecl *VD, + const CapturedRegionScopeInfo *RSI) { + assert(LangOpts.OpenMP && "OpenMP is not allowed"); + + auto &Ctx = getASTContext(); + bool IsByRef = true; + + // Find the directive that is associated with the provided scope. + auto DKind = DSAStack->getDirectiveForScope(RSI->TheScope); + auto Ty = VD->getType(); + + if (isOpenMPTargetDirective(DKind)) { + // This table summarizes how a given variable should be passed to the device + // given its type and the clauses where it appears. This table is based on + // the description in OpenMP 4.5 [2.10.4, target Construct] and + // OpenMP 4.5 [2.15.5, Data-mapping Attribute Rules and Clauses]. + // + // ========================================================================= + // | type | defaultmap | pvt | first | is_device_ptr | map | res. | + // | |(tofrom:scalar)| | pvt | | | | + // ========================================================================= + // | scl | | | | - | | bycopy| + // | scl | | - | x | - | - | bycopy| + // | scl | | x | - | - | - | null | + // | scl | x | | | - | | byref | + // | scl | x | - | x | - | - | bycopy| + // | scl | x | x | - | - | - | null | + // | scl | | - | - | - | x | byref | + // | scl | x | - | - | - | x | byref | + // + // | agg | n.a. | | | - | | byref | + // | agg | n.a. | - | x | - | - | byref | + // | agg | n.a. | x | - | - | - | null | + // | agg | n.a. | - | - | - | x | byref | + // | agg | n.a. | - | - | - | x[] | byref | + // + // | ptr | n.a. | | | - | | bycopy| + // | ptr | n.a. | - | x | - | - | bycopy| + // | ptr | n.a. | x | - | - | - | null | + // | ptr | n.a. | - | - | - | x | byref | + // | ptr | n.a. | - | - | - | x[] | bycopy| + // | ptr | n.a. | - | - | x | | bycopy| + // | ptr | n.a. | - | - | x | x | bycopy| + // | ptr | n.a. | - | - | x | x[] | bycopy| + // ========================================================================= + // Legend: + // scl - scalar + // ptr - pointer + // agg - aggregate + // x - applies + // - - invalid in this combination + // [] - mapped with an array section + // byref - should be mapped by reference + // byval - should be mapped by value + // null - initialize a local variable to null on the device + // + // Observations: + // - All scalar declarations that show up in a map clause have to be passed + // by reference, because they may have been mapped in the enclosing data + // environment. + // - If the scalar value does not fit the size of uintptr, it has to be + // passed by reference, regardless the result in the table above. + // - For pointers mapped by value that have either an implicit map or an + // array section, the runtime library may pass the NULL value to the + // device instead of the value passed to it by the compiler. + + // FIXME: Right now, only implicit maps are implemented. Properly mapping + // values requires having the map, private, and firstprivate clauses SEMA + // and parsing in place, which we don't yet. + + if (Ty->isReferenceType()) + Ty = Ty->castAs()->getPointeeType(); + IsByRef = !Ty->isScalarType(); + } + + // When passing data by value, we need to make sure it fits the uintptr size + // and alignment, because the runtime library only deals with uintptr types. + // If it does not fit the uintptr size, we need to pass the data by reference + // instead. + if (!IsByRef && + (Ctx.getTypeSizeInChars(Ty) > + Ctx.getTypeSizeInChars(Ctx.getUIntPtrType()) || + Ctx.getDeclAlign(VD) > Ctx.getTypeAlignInChars(Ctx.getUIntPtrType()))) + IsByRef = true; + + return IsByRef; +} + bool Sema::IsOpenMPCapturedVar(VarDecl *VD) { assert(LangOpts.OpenMP && "OpenMP is not allowed"); VD = VD->getCanonicalDecl(); @@ -4951,7 +5048,13 @@ if (!AStmt) return StmtError(); - assert(isa(AStmt) && "Captured statement expected"); + CapturedStmt *CS = cast(AStmt); + // 1.2.2 OpenMP Language Terminology + // Structured block - An executable statement with a single entry at the + // top and a single exit at the bottom. + // The point of exit cannot be a branch out of the structured block. + // longjmp() and throw() must not violate the entry/exit criteria. + CS->getCapturedDecl()->setNothrow(); // OpenMP [2.16, Nesting of Regions] // If specified, a teams construct must be contained within a target Index: lib/Sema/SemaStmt.cpp =================================================================== --- lib/Sema/SemaStmt.cpp +++ lib/Sema/SemaStmt.cpp @@ -3803,11 +3803,10 @@ continue; } - assert(Cap->isReferenceCapture() && - "non-reference capture not yet implemented"); - Captures.push_back(CapturedStmt::Capture(Cap->getLocation(), - CapturedStmt::VCK_ByRef, + Cap->isReferenceCapture() + ? CapturedStmt::VCK_ByRef + : CapturedStmt::VCK_ByCopy, Cap->getVariable())); CaptureInits.push_back(Cap->getInitExpr()); } Index: test/OpenMP/target_codegen.cpp =================================================================== --- test/OpenMP/target_codegen.cpp +++ test/OpenMP/target_codegen.cpp @@ -1,9 +1,9 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -16,15 +16,15 @@ // sizes. // CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2] -// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 3] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 128] // CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] -// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 3, i32 3] -// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 128, i32 128] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 128, i32 3, i32 128, i32 3, i32 3, i32 128, i32 128, i32 3, i32 3] // CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] -// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 128, i32 128, i32 3] // CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] -// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 3, i32 3, i32 3, i32 3] -// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 128, i32 128, i32 128, i32 3] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 128, i32 128, i32 128, i32 3] // CHECK-DAG: @{{.*}} = private constant i8 0 // CHECK-DAG: @{{.*}} = private constant i8 0 // CHECK-DAG: @{{.*}} = private constant i8 0 @@ -66,7 +66,7 @@ // CHECK: store i32 -1, i32* [[RHV]], align 4 // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 - // CHECK: call void [[HVT1:@.+]](i32* {{[^,]+}}) + // CHECK: call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}}) #pragma omp target if(0) { a += 1; @@ -79,15 +79,15 @@ // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]] // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] - // CHECK-DAG: [[BP0]] = bitcast i16* %{{.+}} to i8* - // CHECK-DAG: [[P0]] = bitcast i16* %{{.+}} to i8* + // CHECK-DAG: [[BP0]] = inttoptr i[[SZ]] %{{.+}} to i8* + // CHECK-DAG: [[P0]] = inttoptr i[[SZ]] %{{.+}} to i8* // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] - // CHECK: call void [[HVT2:@.+]](i16* {{[^,]+}}) + // CHECK: call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] #pragma omp target if(1) @@ -106,15 +106,15 @@ // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] - // CHECK-DAG: [[BP0]] = bitcast i32* %{{.+}} to i8* - // CHECK-DAG: [[P0]] = bitcast i32* %{{.+}} to i8* + // CHECK-DAG: [[BP0]] = inttoptr i[[SZ]] %{{.+}} to i8* + // CHECK-DAG: [[P0]] = inttoptr i[[SZ]] %{{.+}} to i8* // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 // CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]] // CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]] - // CHECK-DAG: [[BP1]] = bitcast i16* %{{.+}} to i8* - // CHECK-DAG: [[P1]] = bitcast i16* %{{.+}} to i8* + // CHECK-DAG: [[BP1]] = inttoptr i[[SZ]] %{{.+}} to i8* + // CHECK-DAG: [[P1]] = inttoptr i[[SZ]] %{{.+}} to i8* // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 // CHECK-NEXT: br label %[[IFEND:.+]] @@ -137,12 +137,17 @@ } // We capture 3 VLA sizes in this target region - // CHECK: store i[[SZ]] [[BNELEMSIZE:%.+]], i[[SZ]]* [[VLA0:%[^,]+]] - // CHECK: store i[[SZ]] 5, i[[SZ]]* [[VLA1:%[^,]+]] - // CHECK: store i[[SZ]] [[CNELEMSIZE1:%.+]], i[[SZ]]* [[VLA2:%[^,]+]] + // CHECK-64: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK-64: [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32* + // CHECK-64: store i32 [[A_VAL]], i32* [[A_ADDR]], + // CHECK-64: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], - // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[BNELEMSIZE]], 4 - // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[CNELEMSIZE1]] + // CHECK-32: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], + // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + + // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 + // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 @@ -183,26 +188,24 @@ // The names below are not necessarily consistent with the names used for the // addresses above as some are repeated. - // CHECK-DAG: [[BP0:%[^,]+]] = bitcast i[[SZ]]* [[VLA0]] to i8* - // CHECK-DAG: [[P0:%[^,]+]] = bitcast i[[SZ]]* [[VLA0]] to i8* + // CHECK-DAG: [[BP0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8* + // CHECK-DAG: [[P0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8* // CHECK-DAG: store i8* [[BP0]], i8** {{%[^,]+}} // CHECK-DAG: store i8* [[P0]], i8** {{%[^,]+}} // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} - // CHECK-DAG: [[BP1:%[^,]+]] = bitcast i[[SZ]]* [[VLA1]] to i8* - // CHECK-DAG: [[P1:%[^,]+]] = bitcast i[[SZ]]* [[VLA1]] to i8* + // CHECK-DAG: [[BP1:%[^,]+]] = inttoptr i[[SZ]] [[VLA1]] to i8* + // CHECK-DAG: [[P1:%[^,]+]] = inttoptr i[[SZ]] [[VLA1]] to i8* // CHECK-DAG: store i8* [[BP1]], i8** {{%[^,]+}} // CHECK-DAG: store i8* [[P1]], i8** {{%[^,]+}} // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} - // CHECK-DAG: [[BP2:%[^,]+]] = bitcast i[[SZ]]* [[VLA2]] to i8* - // CHECK-DAG: [[P2:%[^,]+]] = bitcast i[[SZ]]* [[VLA2]] to i8* - // CHECK-DAG: store i8* [[BP2]], i8** {{%[^,]+}} - // CHECK-DAG: store i8* [[P2]], i8** {{%[^,]+}} + // CHECK-DAG: store i8* inttoptr (i[[SZ]] 5 to i8*), i8** {{%[^,]+}} + // CHECK-DAG: store i8* inttoptr (i[[SZ]] 5 to i8*), i8** {{%[^,]+}} // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} - // CHECK-DAG: [[BP3:%[^,]+]] = bitcast i32* %{{.+}} to i8* - // CHECK-DAG: [[P3:%[^,]+]] = bitcast i32* %{{.+}} to i8* + // CHECK-DAG: [[BP3:%[^,]+]] = inttoptr i[[SZ]] [[A_CVAL]] to i8* + // CHECK-DAG: [[P3:%[^,]+]] = inttoptr i[[SZ]] [[A_CVAL]] to i8* // CHECK-DAG: store i8* [[BP3]], i8** {{%[^,]+}} // CHECK-DAG: store i8* [[P3]], i8** {{%[^,]+}} // CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}} @@ -265,67 +268,67 @@ // CHECK: define internal void [[HVT0]]() -// CHECK: define internal void [[HVT1]](i32* dereferenceable(4) %{{.+}}) +// CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}) // Create stack storage and store argument in there. -// CHECK: [[A_ADDR:%.+]] = alloca i32*, align -// CHECK: store i32* %{{.+}}, i32** [[A_ADDR]], align -// CHECK: [[A_ADDR2:%.+]] = load i32*, i32** [[A_ADDR]], align -// CHECK: load i32, i32* [[A_ADDR2]], align +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32* +// CHECK-64: load i32, i32* [[AA_CADDR]], align +// CHECK-32: load i32, i32* [[AA_ADDR]], align -// CHECK: define internal void [[HVT2]](i16* dereferenceable(2) %{{.+}}) +// CHECK: define internal void [[HVT2]](i[[SZ]] %{{.+}}) // Create stack storage and store argument in there. -// CHECK: [[AA_ADDR:%.+]] = alloca i16*, align -// CHECK: store i16* %{{.+}}, i16** [[AA_ADDR]], align -// CHECK: [[AA_ADDR2:%.+]] = load i16*, i16** [[AA_ADDR]], align -// CHECK: load i16, i16* [[AA_ADDR2]], align +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: load i16, i16* [[AA_CADDR]], align // CHECK: define internal void [[HVT3]] // Create stack storage and store argument in there. -// CHECK-DAG: [[A_ADDR:%.+]] = alloca i32*, align -// CHECK-DAG: [[AA_ADDR:%.+]] = alloca i16*, align -// CHECK-DAG: store i32* %{{.+}}, i32** [[A_ADDR]], align -// CHECK-DAG: store i16* %{{.+}}, i16** [[AA_ADDR]], align -// CHECK-DAG: [[A_ADDR2:%.+]] = load i32*, i32** [[A_ADDR]], align -// CHECK-DAG: [[AA_ADDR2:%.+]] = load i16*, i16** [[AA_ADDR]], align -// CHECK-DAG: load i32, i32* [[A_ADDR2]], align -// CHECK-DAG: load i16, i16* [[AA_ADDR2]], align +// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32* +// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK-64-DAG:load i32, i32* [[A_CADDR]], align +// CHECK-32-DAG:load i32, i32* [[A_ADDR]], align +// CHECK-DAG: load i16, i16* [[AA_CADDR]], align // CHECK: define internal void [[HVT4]] // Create local storage for each capture. -// CHECK-DAG: [[LOCAL_A:%.+]] = alloca i32* -// CHECK-DAG: [[LOCAL_B:%.+]] = alloca [10 x float]* -// CHECK-DAG: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]* -// CHECK-DAG: [[LOCAL_BN:%.+]] = alloca float* -// CHECK-DAG: [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]* -// CHECK-DAG: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]* -// CHECK-DAG: [[LOCAL_VLA3:%.+]] = alloca i[[SZ]]* -// CHECK-DAG: [[LOCAL_CN:%.+]] = alloca double* -// CHECK-DAG: [[LOCAL_D:%.+]] = alloca [[TT]]* -// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCAL_A]] +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x float]* +// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_BN:%.+]] = alloca float* +// CHECK: [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]* +// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA3:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_CN:%.+]] = alloca double* +// CHECK: [[LOCAL_D:%.+]] = alloca [[TT]]* +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] // CHECK-DAG: store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]] -// CHECK-DAG: store i[[SZ]]* [[ARG_VLA1:%.+]], i[[SZ]]** [[LOCAL_VLA1]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]] // CHECK-DAG: store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]] // CHECK-DAG: store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]] -// CHECK-DAG: store i[[SZ]]* [[ARG_VLA2:%.+]], i[[SZ]]** [[LOCAL_VLA2]] -// CHECK-DAG: store i[[SZ]]* [[ARG_VLA3:%.+]], i[[SZ]]** [[LOCAL_VLA3]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]] // CHECK-DAG: store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]] // CHECK-DAG: store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]] -// CHECK-DAG: [[REF_A:%.+]] = load i32*, i32** [[LOCAL_A]], +// CHECK-64-DAG:[[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* // CHECK-DAG: [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]], -// CHECK-DAG: [[REF_VLA1:%.+]] = load i[[SZ]]*, i[[SZ]]** [[LOCAL_VLA1]], -// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[REF_VLA1]], +// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]], // CHECK-DAG: [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]], // CHECK-DAG: [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]], -// CHECK-DAG: [[REF_VLA2:%.+]] = load i[[SZ]]*, i[[SZ]]** [[LOCAL_VLA2]], -// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[REF_VLA2]], -// CHECK-DAG: [[REF_VLA3:%.+]] = load i[[SZ]]*, i[[SZ]]** [[LOCAL_VLA3]], -// CHECK-DAG: [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[REF_VLA3]], +// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]], +// CHECK-DAG: [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]], // CHECK-DAG: [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]], // CHECK-DAG: [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]], // Use captures. -// CHECK-DAG: load i32, i32* [[REF_A]] +// CHECK-64-DAG: load i32, i32* [[REF_A]] +// CHECK-32-DAG: load i32, i32* [[LOCAL_A]] // CHECK-DAG: getelementptr inbounds [10 x float], [10 x float]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2 // CHECK-DAG: getelementptr inbounds float, float* [[REF_BN]], i[[SZ]] 3 // CHECK-DAG: getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] 0, i[[SZ]] 1 @@ -406,10 +409,16 @@ // // CHECK: define {{.*}}[[FS1]] // +// CHECK: i8* @llvm.stacksave() +// CHECK-64: [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32* +// CHECK-64: store i32 %{{.+}}, i32* [[B_ADDR]], +// CHECK-64: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]], + +// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], +// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], + // We capture 2 VLA sizes in this target region -// CHECK: store i[[SZ]] 2, i[[SZ]]* [[VLA0:%[^,]+]] -// CHECK: store i[[SZ]] [[CELEMSIZE1:%.+]], i[[SZ]]* [[VLA1:%[^,]+]] -// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[CELEMSIZE1]] +// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] // CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 @@ -434,20 +443,18 @@ // The names below are not necessarily consistent with the names used for the // addresses above as some are repeated. -// CHECK-DAG: [[BP0:%[^,]+]] = bitcast i[[SZ]]* [[VLA0]] to i8* -// CHECK-DAG: [[P0:%[^,]+]] = bitcast i[[SZ]]* [[VLA0]] to i8* +// CHECK-DAG: [[BP0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8* +// CHECK-DAG: [[P0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8* // CHECK-DAG: store i8* [[BP0]], i8** {{%[^,]+}} // CHECK-DAG: store i8* [[P0]], i8** {{%[^,]+}} // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} -// CHECK-DAG: [[BP1:%[^,]+]] = bitcast i[[SZ]]* [[VLA1]] to i8* -// CHECK-DAG: [[P1:%[^,]+]] = bitcast i[[SZ]]* [[VLA1]] to i8* -// CHECK-DAG: store i8* [[BP1]], i8** {{%[^,]+}} -// CHECK-DAG: store i8* [[P1]], i8** {{%[^,]+}} +// CHECK-DAG: store i8* inttoptr (i[[SZ]] 2 to i8*), i8** {{%[^,]+}} +// CHECK-DAG: store i8* inttoptr (i[[SZ]] 2 to i8*), i8** {{%[^,]+}} // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} -// CHECK-DAG: [[BP2:%[^,]+]] = bitcast i32* %{{.+}} to i8* -// CHECK-DAG: [[P2:%[^,]+]] = bitcast i32* %{{.+}} to i8* +// CHECK-DAG: [[BP2:%[^,]+]] = inttoptr i[[SZ]] [[B_CVAL]] to i8* +// CHECK-DAG: [[P2:%[^,]+]] = inttoptr i[[SZ]] [[B_CVAL]] to i8* // CHECK-DAG: store i8* [[BP2]], i8** {{%[^,]+}} // CHECK-DAG: store i8* [[P2]], i8** {{%[^,]+}} // CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}} @@ -488,15 +495,15 @@ // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 0 // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] -// CHECK-DAG: [[BP0]] = bitcast i32* %{{.+}} to i8* -// CHECK-DAG: [[P0]] = bitcast i32* %{{.+}} to i8* +// CHECK-DAG: [[BP0]] = inttoptr i[[SZ]] [[VAL0:%.+]] to i8* +// CHECK-DAG: [[P0]] = inttoptr i[[SZ]] [[VAL0]] to i8* // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 1 // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 1 // CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]] // CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]] -// CHECK-DAG: [[BP1]] = bitcast i16* %{{.+}} to i8* -// CHECK-DAG: [[P1]] = bitcast i16* %{{.+}} to i8* +// CHECK-DAG: [[BP1]] = inttoptr i[[SZ]] [[VAL1:%.+]] to i8* +// CHECK-DAG: [[P1]] = inttoptr i[[SZ]] [[VAL1]] to i8* // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 2 // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 2 @@ -540,15 +547,15 @@ // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0 // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] -// CHECK-DAG: [[BP0]] = bitcast i32* %{{.+}} to i8* -// CHECK-DAG: [[P0]] = bitcast i32* %{{.+}} to i8* +// CHECK-DAG: [[BP0]] = inttoptr i[[SZ]] [[VAL0:%.+]] to i8* +// CHECK-DAG: [[P0]] = inttoptr i[[SZ]] [[VAL0]] to i8* // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1 // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1 // CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]] // CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]] -// CHECK-DAG: [[BP1]] = bitcast i16* %{{.+}} to i8* -// CHECK-DAG: [[P1]] = bitcast i16* %{{.+}} to i8* +// CHECK-DAG: [[BP1]] = inttoptr i[[SZ]] [[VAL1:%.+]] to i8* +// CHECK-DAG: [[P1]] = inttoptr i[[SZ]] [[VAL1]] to i8* // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2 // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2 @@ -580,65 +587,66 @@ // CHECK: define internal void [[HVT7]] // Create local storage for each capture. -// CHECK-DAG: [[LOCAL_THIS:%.+]] = alloca [[S1]]* -// CHECK-DAG: [[LOCAL_B:%.+]] = alloca i32* -// CHECK-DAG: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]* -// CHECK-DAG: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]* -// CHECK-DAG: [[LOCAL_C:%.+]] = alloca i16* +// CHECK: [[LOCAL_THIS:%.+]] = alloca [[S1]]* +// CHECK: [[LOCAL_B:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_C:%.+]] = alloca i16* // CHECK-DAG: store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]] -// CHECK-DAG: store i32* [[ARG_B:%.+]], i32** [[LOCAL_B]] -// CHECK-DAG: store i[[SZ]]* [[ARG_VLA1:%.+]], i[[SZ]]** [[LOCAL_VLA1]] -// CHECK-DAG: store i[[SZ]]* [[ARG_VLA2:%.+]], i[[SZ]]** [[LOCAL_VLA2]] +// CHECK-DAG: store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]] // CHECK-DAG: store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]] // Store captures in the context. // CHECK-DAG: [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]], -// CHECK-DAG: [[REF_B:%.+]] = load i32*, i32** [[LOCAL_B]], -// CHECK-DAG: [[REF_VLA1:%.+]] = load i[[SZ]]*, i[[SZ]]** [[LOCAL_VLA1]], -// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[REF_VLA1]], -// CHECK-DAG: [[REF_VLA2:%.+]] = load i[[SZ]]*, i[[SZ]]** [[LOCAL_VLA2]], -// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[REF_VLA2]], +// CHECK-64-DAG:[[REF_B:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32* +// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]], +// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]], // CHECK-DAG: [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]], // Use captures. // CHECK-DAG: getelementptr inbounds [[S1]], [[S1]]* [[REF_THIS]], i32 0, i32 0 -// CHECK-DAG: load i32, i32* [[REF_B]] +// CHECK-64-DAG:load i32, i32* [[REF_B]] +// CHECK-32-DAG:load i32, i32* [[LOCAL_B]] // CHECK-DAG: getelementptr inbounds i16, i16* [[REF_C]], i[[SZ]] %{{.+}} // CHECK: define internal void [[HVT6]] // Create local storage for each capture. -// CHECK-DAG: [[LOCAL_A:%.+]] = alloca i32* -// CHECK-DAG: [[LOCAL_AA:%.+]] = alloca i16* -// CHECK-DAG: [[LOCAL_AAA:%.+]] = alloca i8* -// CHECK-DAG: [[LOCAL_B:%.+]] = alloca [10 x i32]* -// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCAL_A]] -// CHECK-DAG: store i16* [[ARG_AA:%.+]], i16** [[LOCAL_AA]] -// CHECK-DAG: store i8* [[ARG_AAA:%.+]], i8** [[LOCAL_AAA]] +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AAA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-DAG: store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]] // CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] // Store captures in the context. -// CHECK-DAG: [[REF_A:%.+]] = load i32*, i32** [[LOCAL_A]], -// CHECK-DAG: [[REF_AA:%.+]] = load i16*, i16** [[LOCAL_AA]], -// CHECK-DAG: [[REF_AAA:%.+]] = load i8*, i8** [[LOCAL_AAA]], -// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], +// CHECK-64-DAG: [[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[REF_AA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-DAG: [[REF_AAA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], // Use captures. -// CHECK-DAG: load i32, i32* [[REF_A]] -// CHECK-DAG: load i16, i16* [[REF_AA]] -// CHECK-DAG: load i8, i8* [[REF_AAA]] -// CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2 +// CHECK-64-DAG: load i32, i32* [[REF_A]] +// CHECK-DAG: load i16, i16* [[REF_AA]] +// CHECK-DAG: load i8, i8* [[REF_AAA]] +// CHECK-32-DAG: load i32, i32* [[LOCAL_A]] +// CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2 // CHECK: define internal void [[HVT5]] // Create local storage for each capture. -// CHECK-DAG: [[LOCAL_A:%.+]] = alloca i32* -// CHECK-DAG: [[LOCAL_AA:%.+]] = alloca i16* -// CHECK-DAG: [[LOCAL_B:%.+]] = alloca [10 x i32]* -// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCAL_A]] -// CHECK-DAG: store i16* [[ARG_AA:%.+]], i16** [[LOCAL_AA]] +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] // CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] // Store captures in the context. -// CHECK-DAG: [[REF_A:%.+]] = load i32*, i32** [[LOCAL_A]], -// CHECK-DAG: [[REF_AA:%.+]] = load i16*, i16** [[LOCAL_AA]], +// CHECK-64-DAG:[[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[REF_AA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* // CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], // Use captures. -// CHECK-DAG: load i32, i32* [[REF_A]] +// CHECK-64-DAG: load i32, i32* [[REF_A]] +// CHECK-32-DAG: load i32, i32* [[LOCAL_A]] // CHECK-DAG: load i16, i16* [[REF_AA]] // CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2 #endif Index: test/OpenMP/target_codegen_global_capture.cpp =================================================================== --- test/OpenMP/target_codegen_global_capture.cpp +++ test/OpenMP/target_codegen_global_capture.cpp @@ -1,9 +1,9 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -41,27 +41,88 @@ static float Sc = 7.0; static float Sd = 8.0; - // CHECK-DAG: [[REFB:%.+]] = bitcast i16* [[LB]] to i8* - // CHECK-DAG: store i8* [[REFB]], i8** [[GEPB:%.+]], align - // CHECK-DAG: [[REFC:%.+]] = bitcast i16* [[LC]] to i8* - // CHECK-DAG: store i8* [[REFC]], i8** [[GEPC:%.+]], align - // CHECK-DAG: [[REFD:%.+]] = bitcast i16* [[LD]] to i8* - // CHECK-DAG: store i8* [[REFD]], i8** [[GEPD:%.+]], align - // CHECK-DAG: store i8* bitcast (double* [[GB]] to i8*), i8** [[GEPGB:%.+]], align - // CHECK-DAG: store i8* bitcast (double* [[GC]] to i8*), i8** [[GEPGC:%.+]], align - // CHECK-DAG: store i8* bitcast (double* [[GD]] to i8*), i8** [[GEPGD:%.+]], align - // CHECK-DAG: store i8* bitcast (float* [[FB]] to i8*), i8** [[GEPFB:%.+]], align - // CHECK-DAG: store i8* bitcast (float* [[FC]] to i8*), i8** [[GEPFC:%.+]], align - // CHECK-DAG: store i8* bitcast (float* [[FD]] to i8*), i8** [[GEPFD:%.+]], align - // CHECK-DAG: [[GEPB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[VALLB:%.+]] = load i16, i16* [[LB]], + // CHECK-64-DAG: [[VALGB:%.+]] = load double, double* @Gb, + // CHECK-DAG: [[VALFB:%.+]] = load float, float* @_ZZ3foossssE2Sb, + // CHECK-64-DAG: [[VALGC:%.+]] = load double, double* @Gc, + // CHECK-DAG: [[VALLC:%.+]] = load i16, i16* [[LC]], + // CHECK-DAG: [[VALFC:%.+]] = load float, float* @_ZZ3foossssE2Sc, + // CHECK-DAG: [[VALLD:%.+]] = load i16, i16* [[LD]], + // CHECK-64-DAG: [[VALGD:%.+]] = load double, double* @Gd, + // CHECK-DAG: [[VALFD:%.+]] = load float, float* @_ZZ3foossssE2Sd, + + // 3 local vars being captured. + + // CHECK-DAG: store i16 [[VALLB]], i16* [[CONVLB:%.+]], + // CHECK-DAG: [[CONVLB]] = bitcast i[[sz:64|32]]* [[CADDRLB:%.+]] to i16* + // CHECK-DAG: [[CVALLB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLB]], + // CHECK-DAG: [[CPTRLB:%.+]] = inttoptr i[[sz]] [[CVALLB]] to i8* + // CHECK-DAG: store i8* [[CPTRLB]], i8** [[GEPLB:%.+]], + // CHECK-DAG: [[GEPLB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // CHECK-DAG: store i16 [[VALLC]], i16* [[CONVLC:%.+]], + // CHECK-DAG: [[CONVLC]] = bitcast i[[sz]]* [[CADDRLC:%.+]] to i16* + // CHECK-DAG: [[CVALLC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLC]], + // CHECK-DAG: [[CPTRLC:%.+]] = inttoptr i[[sz]] [[CVALLC]] to i8* + // CHECK-DAG: store i8* [[CPTRLC]], i8** [[GEPLC:%.+]], + // CHECK-DAG: [[GEPLC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // CHECK-DAG: store i16 [[VALLD]], i16* [[CONVLD:%.+]], + // CHECK-DAG: [[CONVLD]] = bitcast i[[sz]]* [[CADDRLD:%.+]] to i16* + // CHECK-DAG: [[CVALLD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLD]], + // CHECK-DAG: [[CPTRLD:%.+]] = inttoptr i[[sz]] [[CVALLD]] to i8* + // CHECK-DAG: store i8* [[CPTRLD]], i8** [[GEPLD:%.+]], + // CHECK-DAG: [[GEPLD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // 3 static vars being captured. + + // CHECK-DAG: store float [[VALFB]], float* [[CONVFB:%.+]], + // CHECK-DAG: [[CONVFB]] = bitcast i[[sz]]* [[CADDRFB:%.+]] to float* + // CHECK-DAG: [[CVALFB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFB]], + // CHECK-DAG: [[CPTRFB:%.+]] = inttoptr i[[sz]] [[CVALFB]] to i8* + // CHECK-DAG: store i8* [[CPTRFB]], i8** [[GEPFB:%.+]], + // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // CHECK-DAG: store float [[VALFC]], float* [[CONVFC:%.+]], + // CHECK-DAG: [[CONVFC]] = bitcast i[[sz]]* [[CADDRFC:%.+]] to float* + // CHECK-DAG: [[CVALFC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFC]], + // CHECK-DAG: [[CPTRFC:%.+]] = inttoptr i[[sz]] [[CVALFC]] to i8* + // CHECK-DAG: store i8* [[CPTRFC]], i8** [[GEPFC:%.+]], + // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // CHECK-DAG: store float [[VALFD]], float* [[CONVFD:%.+]], + // CHECK-DAG: [[CONVFD]] = bitcast i[[sz]]* [[CADDRFD:%.+]] to float* + // CHECK-DAG: [[CVALFD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFD]], + // CHECK-DAG: [[CPTRFD:%.+]] = inttoptr i[[sz]] [[CVALFD]] to i8* + // CHECK-DAG: store i8* [[CPTRFD]], i8** [[GEPFD:%.+]], + // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // 3 static global vars being captured. + + // CHECK-64-DAG: store double [[VALGB]], double* [[CONVGB:%.+]], + // CHECK-64-DAG: [[CONVGB]] = bitcast i[[sz]]* [[CADDRGB:%.+]] to double* + // CHECK-64-DAG: [[CVALGB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGB]], + // CHECK-64-DAG: [[CPTRGB:%.+]] = inttoptr i[[sz]] [[CVALGB]] to i8* + // CHECK-64-DAG: store i8* [[CPTRGB]], i8** [[GEPGB:%.+]], + // CHECK-32-DAG: store i8* bitcast (double* @Gb to i8*), i8** [[GEPGB:%.+]], + // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // CHECK-64-DAG: store double [[VALGC]], double* [[CONVGC:%.+]], + // CHECK-64-DAG: [[CONVGC]] = bitcast i[[sz]]* [[CADDRGC:%.+]] to double* + // CHECK-64-DAG: [[CVALGC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGC]], + // CHECK-64-DAG: [[CPTRGC:%.+]] = inttoptr i[[sz]] [[CVALGC]] to i8* + // CHECK-64-DAG: store i8* [[CPTRGC]], i8** [[GEPGC:%.+]], + // CHECK-32-DAG: store i8* bitcast (double* @Gc to i8*), i8** [[GEPGC:%.+]], + // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // CHECK-64-DAG: store double [[VALGD]], double* [[CONVGD:%.+]], + // CHECK-64-DAG: [[CONVGD]] = bitcast i[[sz]]* [[CADDRGD:%.+]] to double* + // CHECK-64-DAG: [[CVALGD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGD]], + // CHECK-64-DAG: [[CPTRGD:%.+]] = inttoptr i[[sz]] [[CVALGD]] to i8* + // CHECK-64-DAG: store i8* [[CPTRGD]], i8** [[GEPGD:%.+]], + // CHECK-32-DAG: store i8* bitcast (double* @Gd to i8*), i8** [[GEPGD:%.+]], + // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + // CHECK: call i32 @__tgt_target // CHECK: call void [[OFFLOADF:@.+]]( // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd @@ -71,7 +132,7 @@ Gb += 1.0; Sb += 1.0; - // CHECK: define internal void [[OFFLOADF]]({{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}) + // CHECK: define internal void [[OFFLOADF]]({{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}) // The parallel region only uses 3 captures. // CHECK: call {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}), {{.+}}* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}}) // CHECK: call void @.omp_outlined.(i32* %{{.+}}, i32* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}}) @@ -106,45 +167,98 @@ // CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}), i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}}) // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i16* dereferenceable(2) [[A:%.+]], i16* dereferenceable(2) [[B:%.+]], i16* dereferenceable(2) [[C:%.+]], i16* dereferenceable(2) [[D:%.+]]) // Capture a, b, c, d + // CHECK: [[ALLOCLA:%.+]] = alloca i16 + // CHECK: [[ALLOCLB:%.+]] = alloca i16 + // CHECK: [[ALLOCLC:%.+]] = alloca i16 + // CHECK: [[ALLOCLD:%.+]] = alloca i16 + // CHECK: [[LLA:%.+]] = load i16*, i16** [[ALLOCLA]], + // CHECK: [[LLB:%.+]] = load i16*, i16** [[ALLOCLB]], + // CHECK: [[LLC:%.+]] = load i16*, i16** [[ALLOCLC]], + // CHECK: [[LLD:%.+]] = load i16*, i16** [[ALLOCLD]], #pragma omp parallel { - // CHECK: [[ADRA:%.+]] = alloca i16*, align - // CHECK: [[ADRB:%.+]] = alloca i16*, align - // CHECK: [[ADRC:%.+]] = alloca i16*, align - // CHECK: [[ADRD:%.+]] = alloca i16*, align - // CHECK: store i16* [[A]], i16** [[ADRA]], align - // CHECK: store i16* [[B]], i16** [[ADRB]], align - // CHECK: store i16* [[C]], i16** [[ADRC]], align - // CHECK: store i16* [[D]], i16** [[ADRD]], align - // CHECK: [[REFA:%.+]] = load i16*, i16** [[ADRA]], - // CHECK: [[REFB:%.+]] = load i16*, i16** [[ADRB]], - // CHECK: [[REFC:%.+]] = load i16*, i16** [[ADRC]], - // CHECK: [[REFD:%.+]] = load i16*, i16** [[ADRD]], - - // CHECK: load float, float* [[BA]] - - // CHECK-DAG: [[CSTB:%.+]] = bitcast i16* [[REFB]] to i8* - // CHECK-DAG: [[CSTC:%.+]] = bitcast i16* [[REFC]] to i8* - // CHECK-DAG: [[CSTD:%.+]] = bitcast i16* [[REFD]] to i8* - // CHECK-DAG: store i8* [[CSTB]], i8** [[GEPB:%.+]], align - // CHECK-DAG: store i8* [[CSTC]], i8** [[GEPC:%.+]], align - // CHECK-DAG: store i8* [[CSTD]], i8** [[GEPD:%.+]], align - // CHECK-DAG: store i8* bitcast (double* [[GB]] to i8*), i8** [[GEPGB:%.+]], align - // CHECK-DAG: store i8* bitcast (double* [[GC]] to i8*), i8** [[GEPGC:%.+]], align - // CHECK-DAG: store i8* bitcast (double* [[GD]] to i8*), i8** [[GEPGD:%.+]], align - // CHECK-DAG: store i8* bitcast (float* [[BB]] to i8*), i8** [[GEPBB:%.+]], align - // CHECK-DAG: store i8* bitcast (float* [[BC]] to i8*), i8** [[GEPBC:%.+]], align - // CHECK-DAG: store i8* bitcast (float* [[BD]] to i8*), i8** [[GEPBD:%.+]], align - - // CHECK-DAG: [[GEPB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPBB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPBC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} - // CHECK-DAG: [[GEPBD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[VALLB:%.+]] = load i16, i16* [[LLB]], + // CHECK-64-DAG: [[VALGB:%.+]] = load double, double* @Gb, + // CHECK-DAG: [[VALFB:%.+]] = load float, float* @_ZZ3barssssE2Sb, + // CHECK-64-DAG: [[VALGC:%.+]] = load double, double* @Gc, + // CHECK-DAG: [[VALLC:%.+]] = load i16, i16* [[LLC]], + // CHECK-DAG: [[VALFC:%.+]] = load float, float* @_ZZ3barssssE2Sc, + // CHECK-DAG: [[VALLD:%.+]] = load i16, i16* [[LLD]], + // CHECK-64-DAG: [[VALGD:%.+]] = load double, double* @Gd, + // CHECK-DAG: [[VALFD:%.+]] = load float, float* @_ZZ3barssssE2Sd, + + // 3 local vars being captured. + + // CHECK-DAG: store i16 [[VALLB]], i16* [[CONVLB:%.+]], + // CHECK-DAG: [[CONVLB]] = bitcast i[[sz:64|32]]* [[CADDRLB:%.+]] to i16* + // CHECK-DAG: [[CVALLB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLB]], + // CHECK-DAG: [[CPTRLB:%.+]] = inttoptr i[[sz]] [[CVALLB]] to i8* + // CHECK-DAG: store i8* [[CPTRLB]], i8** [[GEPLB:%.+]], + // CHECK-DAG: [[GEPLB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // CHECK-DAG: store i16 [[VALLC]], i16* [[CONVLC:%.+]], + // CHECK-DAG: [[CONVLC]] = bitcast i[[sz]]* [[CADDRLC:%.+]] to i16* + // CHECK-DAG: [[CVALLC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLC]], + // CHECK-DAG: [[CPTRLC:%.+]] = inttoptr i[[sz]] [[CVALLC]] to i8* + // CHECK-DAG: store i8* [[CPTRLC]], i8** [[GEPLC:%.+]], + // CHECK-DAG: [[GEPLC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // CHECK-DAG: store i16 [[VALLD]], i16* [[CONVLD:%.+]], + // CHECK-DAG: [[CONVLD]] = bitcast i[[sz]]* [[CADDRLD:%.+]] to i16* + // CHECK-DAG: [[CVALLD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLD]], + // CHECK-DAG: [[CPTRLD:%.+]] = inttoptr i[[sz]] [[CVALLD]] to i8* + // CHECK-DAG: store i8* [[CPTRLD]], i8** [[GEPLD:%.+]], + // CHECK-DAG: [[GEPLD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // 3 static vars being captured. + + // CHECK-DAG: store float [[VALFB]], float* [[CONVFB:%.+]], + // CHECK-DAG: [[CONVFB]] = bitcast i[[sz]]* [[CADDRFB:%.+]] to float* + // CHECK-DAG: [[CVALFB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFB]], + // CHECK-DAG: [[CPTRFB:%.+]] = inttoptr i[[sz]] [[CVALFB]] to i8* + // CHECK-DAG: store i8* [[CPTRFB]], i8** [[GEPFB:%.+]], + // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // CHECK-DAG: store float [[VALFC]], float* [[CONVFC:%.+]], + // CHECK-DAG: [[CONVFC]] = bitcast i[[sz]]* [[CADDRFC:%.+]] to float* + // CHECK-DAG: [[CVALFC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFC]], + // CHECK-DAG: [[CPTRFC:%.+]] = inttoptr i[[sz]] [[CVALFC]] to i8* + // CHECK-DAG: store i8* [[CPTRFC]], i8** [[GEPFC:%.+]], + // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // CHECK-DAG: store float [[VALFD]], float* [[CONVFD:%.+]], + // CHECK-DAG: [[CONVFD]] = bitcast i[[sz]]* [[CADDRFD:%.+]] to float* + // CHECK-DAG: [[CVALFD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFD]], + // CHECK-DAG: [[CPTRFD:%.+]] = inttoptr i[[sz]] [[CVALFD]] to i8* + // CHECK-DAG: store i8* [[CPTRFD]], i8** [[GEPFD:%.+]], + // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // 3 static global vars being captured. + + // CHECK-64-DAG: store double [[VALGB]], double* [[CONVGB:%.+]], + // CHECK-64-DAG: [[CONVGB]] = bitcast i[[sz]]* [[CADDRGB:%.+]] to double* + // CHECK-64-DAG: [[CVALGB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGB]], + // CHECK-64-DAG: [[CPTRGB:%.+]] = inttoptr i[[sz]] [[CVALGB]] to i8* + // CHECK-64-DAG: store i8* [[CPTRGB]], i8** [[GEPGB:%.+]], + // CHECK-32-DAG: store i8* bitcast (double* @Gb to i8*), i8** [[GEPGB:%.+]], + // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // CHECK-64-DAG: store double [[VALGC]], double* [[CONVGC:%.+]], + // CHECK-64-DAG: [[CONVGC]] = bitcast i[[sz]]* [[CADDRGC:%.+]] to double* + // CHECK-64-DAG: [[CVALGC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGC]], + // CHECK-64-DAG: [[CPTRGC:%.+]] = inttoptr i[[sz]] [[CVALGC]] to i8* + // CHECK-64-DAG: store i8* [[CPTRGC]], i8** [[GEPGC:%.+]], + // CHECK-32-DAG: store i8* bitcast (double* @Gc to i8*), i8** [[GEPGC:%.+]], + // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + + // CHECK-64-DAG: store double [[VALGD]], double* [[CONVGD:%.+]], + // CHECK-64-DAG: [[CONVGD]] = bitcast i[[sz]]* [[CADDRGD:%.+]] to double* + // CHECK-64-DAG: [[CVALGD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGD]], + // CHECK-64-DAG: [[CPTRGD:%.+]] = inttoptr i[[sz]] [[CVALGD]] to i8* + // CHECK-64-DAG: store i8* [[CPTRGD]], i8** [[GEPGD:%.+]], + // CHECK-32-DAG: store i8* bitcast (double* @Gd to i8*), i8** [[GEPGD:%.+]], + // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}} + // CHECK: call i32 @__tgt_target // CHECK: call void [[OFFLOADF:@.+]]( // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd @@ -154,7 +268,7 @@ Gb += 1.0; Sb += 1.0; - // CHECK: define internal void [[OFFLOADF]]({{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}) + // CHECK: define internal void [[OFFLOADF]]({{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}) // CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}) // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}) Index: test/OpenMP/target_map_codegen.cpp =================================================================== --- /dev/null +++ test/OpenMP/target_map_codegen.cpp @@ -0,0 +1,1015 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +/// +/// Implicit maps. +/// + +///==========================================================================/// +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +// CK1-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] +// Map types: OMP_MAP_BYCOPY = 128 +// CK1-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128] + +// CK1-LABEL: implicit_maps_integer +void implicit_maps_integer (int a){ + int i = a; + + // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK1-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK1-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK1-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK1-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK1-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK1-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* + // CK1-64-DAG: store i32 {{.+}}, i32* [[CADDR]], + + // CK1: call void [[KERNEL:@.+]](i[[sz]] [[VAL]]) + #pragma omp target + { + ++i; + } +} + +// CK1: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]]) +// CK1: [[ADDR:%.+]] = alloca i[[sz]], +// CK1: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]], +// CK1-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32* +// CK1-64: {{.+}} = load i32, i32* [[CADDR]], +// CK1-32: {{.+}} = load i32, i32* [[ADDR]], + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +// CK2-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] +// Map types: OMP_MAP_BYCOPY = 128 +// CK2-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128] + +// CK2-LABEL: implicit_maps_integer_reference +void implicit_maps_integer_reference (int a){ + int &i = a; + // CK2-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK2-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK2-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK2-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK2-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK2-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK2-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK2-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* + // CK2-64-DAG: store i32 {{.+}}, i32* [[CADDR]], + + // CK2: call void [[KERNEL:@.+]](i[[sz]] [[VAL]]) + #pragma omp target + { + ++i; + } +} + +// CK2: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]]) +// CK2: [[ADDR:%.+]] = alloca i[[sz]], +// CK2: [[REF:%.+]] = alloca i32*, +// CK2: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]], +// CK2-64: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* +// CK2-64: store i32* [[CADDR]], i32** [[REF]], +// CK2-64: [[RVAL:%.+]] = load i32*, i32** [[REF]], +// CK2-64: {{.+}} = load i32, i32* [[RVAL]], +// CK2-32: store i32* [[ADDR]], i32** [[REF]], +// CK2-32: [[RVAL:%.+]] = load i32*, i32** [[REF]], +// CK2-32: {{.+}} = load i32, i32* [[RVAL]], + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +#ifdef CK3 + +// CK3-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] +// Map types: OMP_MAP_BYCOPY = 128 +// CK3-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128] + +// CK3-LABEL: implicit_maps_parameter +void implicit_maps_parameter (int a){ + + // CK3-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK3-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK3-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK3-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK3-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK3-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK3-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK3-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* + // CK3-64-DAG: store i32 {{.+}}, i32* [[CADDR]], + + // CK3: call void [[KERNEL:@.+]](i[[sz]] [[VAL]]) + #pragma omp target + { + ++a; + } +} + +// CK3: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]]) +// CK3: [[ADDR:%.+]] = alloca i[[sz]], +// CK3: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]], +// CK3-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32* +// CK3-64: {{.+}} = load i32, i32* [[CADDR]], +// CK3-32: {{.+}} = load i32, i32* [[ADDR]], + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64 +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64 +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32 +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32 +#ifdef CK4 + +// CK4-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] +// Map types: OMP_MAP_BYCOPY = 128 +// CK4-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128] + +// CK4-LABEL: implicit_maps_nested_integer +void implicit_maps_nested_integer (int a){ + int i = a; + + // The captures in parallel are by reference. Only the capture in target is by + // copy. + + // CK4: call void {{.+}}@__kmpc_fork_call({{.+}} [[KERNELP1:@.+]] to void (i32*, i32*, ...)*), i32* {{.+}}) + // CK4: define internal void [[KERNELP1]](i32* {{[^,]+}}, i32* {{[^,]+}}, i32* {{[^,]+}}) + #pragma omp parallel + { + // CK4-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK4-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK4-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK4-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK4-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK4-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK4-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK4-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK4-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK4-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK4-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* + // CK4-64-DAG: store i32 {{.+}}, i32* [[CADDR]], + + // CK4: call void [[KERNEL:@.+]](i[[sz]] [[VAL]]) + #pragma omp target + { + #pragma omp parallel + { + ++i; + } + } + } +} + +// CK4: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]]) +// CK4: [[ADDR:%.+]] = alloca i[[sz]], +// CK4: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]], +// CK4-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32* +// CK4-64: call void {{.+}}@__kmpc_fork_call({{.+}} [[KERNELP2:@.+]] to void (i32*, i32*, ...)*), i32* [[CADDR]]) +// CK4-32: call void {{.+}}@__kmpc_fork_call({{.+}} [[KERNELP2:@.+]] to void (i32*, i32*, ...)*), i32* [[ADDR]]) +// CK4: define internal void [[KERNELP2]](i32* {{[^,]+}}, i32* {{[^,]+}}, i32* {{[^,]+}}) +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK5 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK5 --check-prefix CK5-64 +// RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK5 --check-prefix CK5-64 +// RUN: %clang_cc1 -DCK5 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK5 --check-prefix CK5-32 +// RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK5 --check-prefix CK5-32 +#ifdef CK5 + +// CK5-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] +// Map types: OMP_MAP_BYCOPY = 128 +// CK5-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128] + +// CK5-LABEL: implicit_maps_nested_integer_and_enum +void implicit_maps_nested_integer_and_enum (int a){ + enum Bla { + SomeEnum = 0x09 + }; + + // Using an enum should not change the mapping information. + int i = a; + + // CK5-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK5-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK5-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK5-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK5-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK5-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK5-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK5-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK5-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK5-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK5-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* + // CK5-64-DAG: store i32 {{.+}}, i32* [[CADDR]], + + // CK5: call void [[KERNEL:@.+]](i[[sz]] [[VAL]]) + #pragma omp target + { + ++i; + i += SomeEnum; + } +} + +// CK5: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]]) +// CK5: [[ADDR:%.+]] = alloca i[[sz]], +// CK5: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]], +// CK5-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32* +// CK5-64: {{.+}} = load i32, i32* [[CADDR]], +// CK5-32: {{.+}} = load i32, i32* [[ADDR]], + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK6 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK6 --check-prefix CK6-64 +// RUN: %clang_cc1 -DCK6 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK6 --check-prefix CK6-64 +// RUN: %clang_cc1 -DCK6 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK6 --check-prefix CK6-32 +// RUN: %clang_cc1 -DCK6 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK6 --check-prefix CK6-32 +#ifdef CK6 +// CK6-DAG: [[GBL:@Gi]] = global i32 0 +// CK6-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] +// Map types: OMP_MAP_BYCOPY = 128 +// CK6-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128] + +// CK6-LABEL: implicit_maps_host_global +int Gi; +void implicit_maps_host_global (int a){ + // CK6-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK6-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK6-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK6-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK6-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK6-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK6-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK6-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK6-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK6-64-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK6-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* + // CK6-64-DAG: store i32 [[GBLVAL:%.+]], i32* [[CADDR]], + // CK6-64-DAG: [[GBLVAL]] = load i32, i32* [[GBL]], + // CK6-32-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[GBLVAL:%.+]], + + // CK6: call void [[KERNEL:@.+]](i[[sz]] [[VAL]]) + #pragma omp target + { + ++Gi; + } +} + +// CK6: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]]) +// CK6: [[ADDR:%.+]] = alloca i[[sz]], +// CK6: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]], +// CK6-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32* +// CK6-64: {{.+}} = load i32, i32* [[CADDR]], +// CK6-32: {{.+}} = load i32, i32* [[ADDR]], + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK7 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK7 --check-prefix CK7-64 +// RUN: %clang_cc1 -DCK7 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK7 --check-prefix CK7-64 +// RUN: %clang_cc1 -DCK7 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK7 --check-prefix CK7-32 +// RUN: %clang_cc1 -DCK7 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK7 --check-prefix CK7-32 +#ifdef CK7 + +// For a 32-bit targets, the value doesn't fit the size of the pointer, +// therefore it is passed by reference with a map 'to' specification. + +// CK7-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 8] +// Map types: OMP_MAP_BYCOPY = 128 +// CK7-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128] +// Map types: OMP_MAP_TO = 1 +// CK7-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 1] + +// CK7-LABEL: implicit_maps_double +void implicit_maps_double (int a){ + double d = (double)a; + + // CK7-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK7-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK7-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK7-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK7-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + + // CK7-64-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK7-64-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK7-64-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK7-64-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK7-64-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK7-64-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to double* + // CK7-64-64-DAG: store double {{.+}}, double* [[CADDR]], + + // CK7-32-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK7-32-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK7-32-DAG: [[VALBP]] = bitcast double* [[DECL:%.+]] to i8* + // CK7-32-DAG: [[VALP]] = bitcast double* [[DECL]] to i8* + + // CK7-64: call void [[KERNEL:@.+]](i[[sz]] [[VAL]]) + // CK7-32: call void [[KERNEL:@.+]](double* [[DECL]]) + #pragma omp target + { + d += 1.0; + } +} + +// CK7-64: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]]) +// CK7-64: [[ADDR:%.+]] = alloca i[[sz]], +// CK7-64: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]], +// CK7-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to double* +// CK7-64: {{.+}} = load double, double* [[CADDR]], + +// CK7-32: define internal void [[KERNEL]](double* {{.+}}[[ARG:%.+]]) +// CK7-32: [[ADDR:%.+]] = alloca double*, +// CK7-32: store double* [[ARG]], double** [[ADDR]], +// CK7-32: [[REF:%.+]] = load double*, double** [[ADDR]], +// CK7-32: {{.+}} = load double, double* [[REF]], + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK8 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK8 +// RUN: %clang_cc1 -DCK8 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK8 +// RUN: %clang_cc1 -DCK8 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK8 +// RUN: %clang_cc1 -DCK8 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK8 +#ifdef CK8 + +// CK8-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] +// Map types: OMP_MAP_BYCOPY = 128 +// CK8-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128] + +// CK8-LABEL: implicit_maps_float +void implicit_maps_float (int a){ + float f = (float)a; + + // CK8-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK8-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK8-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK8-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK8-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK8-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK8-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK8-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK8-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK8-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK8-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to float* + // CK8-DAG: store float {{.+}}, float* [[CADDR]], + + // CK8: call void [[KERNEL:@.+]](i[[sz]] [[VAL]]) + #pragma omp target + { + f += 1.0; + } +} + +// CK8: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]]) +// CK8: [[ADDR:%.+]] = alloca i[[sz]], +// CK8: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]], +// CK8: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to float* +// CK8: {{.+}} = load float, float* [[CADDR]], + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK9 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK9 +// RUN: %clang_cc1 -DCK9 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK9 +// RUN: %clang_cc1 -DCK9 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK9 +// RUN: %clang_cc1 -DCK9 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK9 +#ifdef CK9 + +// CK9-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 16] +// Map types: OMP_MAP_TO + OMP_MAP_FROM = 2 + 1 +// CK9-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 3] + +// CK9-LABEL: implicit_maps_array +void implicit_maps_array (int a){ + double darr[2] = {(double)a, (double)a}; + + // CK9-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK9-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK9-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK9-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK9-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK9-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK9-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK9-DAG: [[VALBP]] = bitcast [2 x double]* [[DECL:%.+]] to i8* + // CK9-DAG: [[VALP]] = bitcast [2 x double]* [[DECL]] to i8* + + // CK9: call void [[KERNEL:@.+]]([2 x double]* [[DECL]]) + #pragma omp target + { + darr[0] += 1.0; + darr[1] += 1.0; + } +} + +// CK9: define internal void [[KERNEL]]([2 x double]* {{.+}}[[ARG:%.+]]) +// CK9: [[ADDR:%.+]] = alloca [2 x double]*, +// CK9: store [2 x double]* [[ARG]], [2 x double]** [[ADDR]], +// CK9: [[REF:%.+]] = load [2 x double]*, [2 x double]** [[ADDR]], +// CK9: {{.+}} = getelementptr inbounds [2 x double], [2 x double]* [[REF]], i[[sz]] 0, i[[sz]] 0 +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK10 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK10 +// RUN: %clang_cc1 -DCK10 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK10 +// RUN: %clang_cc1 -DCK10 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK10 +// RUN: %clang_cc1 -DCK10 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK10 +#ifdef CK10 + +// CK10-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}] +// Map types: OMP_MAP_BYCOPY | OMP_MAP_PTR = 128 + 32 +// CK10-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 160] + +// CK10-LABEL: implicit_maps_pointer +void implicit_maps_pointer (){ + double *ddyn; + + // CK10-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK10-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK10-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK10-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK10-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK10-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK10-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK10-DAG: [[VALBP]] = bitcast double* [[PTR:%.+]] to i8* + // CK10-DAG: [[VALP]] = bitcast double* [[PTR]] to i8* + + // CK10: call void [[KERNEL:@.+]](double* [[PTR]]) + #pragma omp target + { + ddyn[0] += 1.0; + ddyn[1] += 1.0; + } +} + +// CK10: define internal void [[KERNEL]](double* {{.*}}[[ARG:%.+]]) +// CK10: [[ADDR:%.+]] = alloca double*, +// CK10: store double* [[ARG]], double** [[ADDR]], +// CK10: [[REF:%.+]] = load double*, double** [[ADDR]], +// CK10: {{.+}} = getelementptr inbounds double, double* [[REF]], i[[sz]] 0 + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK11 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK11 +// RUN: %clang_cc1 -DCK11 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK11 +// RUN: %clang_cc1 -DCK11 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK11 +// RUN: %clang_cc1 -DCK11 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK11 +#ifdef CK11 + +// CK11-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 16] +// Map types: OMP_MAP_TO = 1 +// CK11-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 1] + +// CK11-LABEL: implicit_maps_double_complex +void implicit_maps_double_complex (int a){ + double _Complex dc = (double)a; + + // CK11-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK11-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK11-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK11-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK11-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK11-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK11-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK11-DAG: [[VALBP]] = bitcast { double, double }* [[PTR:%.+]] to i8* + // CK11-DAG: [[VALP]] = bitcast { double, double }* [[PTR]] to i8* + + // CK11: call void [[KERNEL:@.+]]({ double, double }* [[PTR]]) + #pragma omp target + { + dc *= dc; + } +} + +// CK11: define internal void [[KERNEL]]({ double, double }* {{.*}}[[ARG:%.+]]) +// CK11: [[ADDR:%.+]] = alloca { double, double }*, +// CK11: store { double, double }* [[ARG]], { double, double }** [[ADDR]], +// CK11: [[REF:%.+]] = load { double, double }*, { double, double }** [[ADDR]], +// CK11: {{.+}} = getelementptr inbounds { double, double }, { double, double }* [[REF]], i32 0, i32 0 +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK12 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK12 --check-prefix CK12-64 +// RUN: %clang_cc1 -DCK12 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK12 --check-prefix CK12-64 +// RUN: %clang_cc1 -DCK12 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK12 --check-prefix CK12-32 +// RUN: %clang_cc1 -DCK12 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK12 --check-prefix CK12-32 +#ifdef CK12 + +// For a 32-bit targets, the value doesn't fit the size of the pointer, +// therefore it is passed by reference with a map 'to' specification. + +// CK12-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 8] +// Map types: OMP_MAP_BYCOPY = 128 +// CK12-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128] +// Map types: OMP_MAP_TO = 1 +// CK12-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 1] + +// CK12-LABEL: implicit_maps_float_complex +void implicit_maps_float_complex (int a){ + float _Complex fc = (float)a; + + // CK12-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK12-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK12-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK12-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK12-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + + // CK12-64-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK12-64-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK12-64-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK12-64-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK12-64-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK12-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to { float, float }* + // CK12-64-DAG: store { float, float } {{.+}}, { float, float }* [[CADDR]], + + // CK12-32-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK12-32-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK12-32-DAG: [[VALBP]] = bitcast { float, float }* [[DECL:%.+]] to i8* + // CK12-32-DAG: [[VALP]] = bitcast { float, float }* [[DECL]] to i8* + + // CK12-64: call void [[KERNEL:@.+]](i[[sz]] [[VAL]]) + // CK12-32: call void [[KERNEL:@.+]]({ float, float }* [[DECL]]) + #pragma omp target + { + fc *= fc; + } +} + +// CK12-64: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]]) +// CK12-64: [[ADDR:%.+]] = alloca i[[sz]], +// CK12-64: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]], +// CK12-64: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to { float, float }* +// CK12-64: {{.+}} = getelementptr inbounds { float, float }, { float, float }* [[CADDR]], i32 0, i32 0 + +// CK12-32: define internal void [[KERNEL]]({ float, float }* {{.+}}[[ARG:%.+]]) +// CK12-32: [[ADDR:%.+]] = alloca { float, float }*, +// CK12-32: store { float, float }* [[ARG]], { float, float }** [[ADDR]], +// CK12-32: [[REF:%.+]] = load { float, float }*, { float, float }** [[ADDR]], +// CK12-32: {{.+}} = getelementptr inbounds { float, float }, { float, float }* [[REF]], i32 0, i32 0 +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK13 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK13 +// RUN: %clang_cc1 -DCK13 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK13 +// RUN: %clang_cc1 -DCK13 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK13 +// RUN: %clang_cc1 -DCK13 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK13 +#ifdef CK13 + +// We don't have a constant map size for VLAs. +// Map types: +// - OMP_MAP_BYCOPY = 128 (vla size) +// - OMP_MAP_BYCOPY = 128 (vla size) +// - OMP_MAP_TO + OMP_MAP_FROM = 2 + 1 +// CK13-DAG: [[TYPES:@.+]] = {{.+}}constant [3 x i32] [i32 128, i32 128, i32 3] + +// CK13-LABEL: implicit_maps_variable_length_array +void implicit_maps_variable_length_array (int a){ + double vla[2][a]; + + // CK13-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i[[sz:64|32]]* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}}) + // CK13-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK13-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK13-DAG: [[SGEP]] = getelementptr inbounds {{.+}}[[SS:%[^,]+]], i32 0, i32 0 + + // CK13-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK13-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK13-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[SS]], i32 0, i32 0 + // CK13-DAG: store i8* inttoptr (i[[sz]] 2 to i8*), i8** [[BP0]], + // CK13-DAG: store i8* inttoptr (i[[sz]] 2 to i8*), i8** [[P0]], + // CK13-DAG: store i[[sz]] {{8|4}}, i[[sz]]* [[S0]], + + // CK13-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1 + // CK13-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1 + // CK13-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[SS]], i32 0, i32 1 + // CK13-DAG: store i8* [[VALBP1:%.+]], i8** [[BP1]], + // CK13-DAG: store i8* [[VALP1:%.+]], i8** [[P1]], + // CK13-DAG: [[VALBP1]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK13-DAG: [[VALP1]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK13-DAG: store i[[sz]] {{8|4}}, i[[sz]]* [[S1]], + + // CK13-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2 + // CK13-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 2 + // CK13-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[SS]], i32 0, i32 2 + // CK13-DAG: store i8* [[VALBP2:%.+]], i8** [[BP2]], + // CK13-DAG: store i8* [[VALP2:%.+]], i8** [[P2]], + // CK13-DAG: store i[[sz]] [[VALS2:%.+]], i[[sz]]* [[S2]], + // CK13-DAG: [[VALBP2]] = bitcast double* [[DECL:%.+]] to i8* + // CK13-DAG: [[VALP2]] = bitcast double* [[DECL]] to i8* + // CK13-DAG: [[VALS2]] = mul nuw i[[sz]] %{{.+}}, 8 + + // CK13: call void [[KERNEL:@.+]](i[[sz]] {{.+}}, i[[sz]] {{.+}}, double* [[DECL]]) + #pragma omp target + { + vla[1][3] += 1.0; + } +} + +// CK13: define internal void [[KERNEL]](i[[sz]] [[VLA0:%.+]], i[[sz]] [[VLA1:%.+]], double* {{.+}}[[ARG:%.+]]) +// CK13: [[ADDR0:%.+]] = alloca i[[sz]], +// CK13: [[ADDR1:%.+]] = alloca i[[sz]], +// CK13: [[ADDR2:%.+]] = alloca double*, +// CK13: store i[[sz]] [[VLA0]], i[[sz]]* [[ADDR0]], +// CK13: store i[[sz]] [[VLA1]], i[[sz]]* [[ADDR1]], +// CK13: store double* [[ARG]], double** [[ADDR2]], +// CK13: {{.+}} = load i[[sz]], i[[sz]]* [[ADDR0]], +// CK13: {{.+}} = load i[[sz]], i[[sz]]* [[ADDR1]], +// CK13: [[REF:%.+]] = load double*, double** [[ADDR2]], +// CK13: {{.+}} = getelementptr inbounds double, double* [[REF]], i[[sz]] %{{.+}} +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK14 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK14 --check-prefix CK14-64 +// RUN: %clang_cc1 -DCK14 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK14 --check-prefix CK14-64 +// RUN: %clang_cc1 -DCK14 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK14 --check-prefix CK14-32 +// RUN: %clang_cc1 -DCK14 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK14 --check-prefix CK14-32 +#ifdef CK14 + +// CK14-DAG: [[ST:%.+]] = type { i32, double } +// CK14-DAG: [[SIZES:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{16|12}}, i{{64|32}} 4] +// Map types: +// - OMP_MAP_TO | OMP_MAP_FROM = 1 + 2 +// - OMP_MAP_BYCOPY = 128 +// CK14-DAG: [[TYPES:@.+]] = {{.+}}constant [2 x i32] [i32 3, i32 128] + +class SSS { +public: + int a; + double b; + + void foo(int c) { + #pragma omp target + { + a += c; + b += (double)c; + } + } + + SSS(int a, double b) : a(a), b(b) {} +}; + +// CK14-LABEL: implicit_maps_class +void implicit_maps_class (int a){ + SSS sss(a, (double)a); + + // CK14: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}}) + // CK14-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK14-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK14-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + + // CK14-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK14-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK14-DAG: store i8* [[VALBP0:%.+]], i8** [[BP0]], + // CK14-DAG: store i8* [[VALP0:%.+]], i8** [[P0]], + // CK14-DAG: [[VALBP0]] = bitcast [[ST]]* [[DECL:%.+]] to i8* + // CK14-DAG: [[VALP0]] = bitcast [[ST]]* [[DECL]] to i8* + + // CK14-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1 + // CK14-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1 + // CK14-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK14-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK14-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK14-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK14-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK14-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* + // CK14-64-DAG: store i32 {{.+}}, i32* [[CADDR]], + + // CK14: call void [[KERNEL:@.+]]([[ST]]* [[DECL]], i[[sz]] {{.+}}) + sss.foo(123); +} + +// CK14: define internal void [[KERNEL]]([[ST]]* [[THIS:%.+]], i[[sz]] [[ARG:%.+]]) +// CK14: [[ADDR0:%.+]] = alloca [[ST]]*, +// CK14: [[ADDR1:%.+]] = alloca i[[sz]], +// CK14: store [[ST]]* [[THIS]], [[ST]]** [[ADDR0]], +// CK14: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR1]], +// CK14: [[REF0:%.+]] = load [[ST]]*, [[ST]]** [[ADDR0]], +// CK14-64: [[CADDR1:%.+]] = bitcast i[[sz]]* [[ADDR1]] to i32* +// CK14-64: {{.+}} = load i32, i32* [[CADDR1]], +// CK14-32: {{.+}} = load i32, i32* [[ADDR1]], +// CK14: {{.+}} = getelementptr inbounds [[ST]], [[ST]]* [[REF0]], i32 0, i32 0 + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK15 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK15 --check-prefix CK15-64 +// RUN: %clang_cc1 -DCK15 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK15 --check-prefix CK15-64 +// RUN: %clang_cc1 -DCK15 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK15 --check-prefix CK15-32 +// RUN: %clang_cc1 -DCK15 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK15 --check-prefix CK15-32 +#ifdef CK15 + +// CK15: [[ST:%.+]] = type { i32, double, i32* } +// CK15: [[SIZES:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{24|16}}, i{{64|32}} 4] +// Map types: +// - OMP_MAP_TO | OMP_MAP_FROM = 1 + 2 +// - OMP_MAP_BYCOPY = 128 +// CK15: [[TYPES:@.+]] = {{.+}}constant [2 x i32] [i32 3, i32 128] + +// CK15: [[SIZES2:@.+]] = {{.+}}constant [2 x i[[sz]]] [i{{64|32}} {{24|16}}, i{{64|32}} 4] +// Map types: +// - OMP_MAP_TO | OMP_MAP_FROM = 1 + 2 +// - OMP_MAP_BYCOPY = 128 +// CK15: [[TYPES2:@.+]] = {{.+}}constant [2 x i32] [i32 3, i32 128] + +template +class SSST { +public: + int a; + double b; + int &r; + + void foo(int c) { + #pragma omp target + { + a += c + x; + b += (double)(c + x); + r += x; + } + } + template + void bar(int c) { + #pragma omp target + { + a += c + x + y; + b += (double)(c + x + y); + r += x + y; + } + } + + SSST(int a, double b, int &r) : a(a), b(b), r(r) {} +}; + +// CK15-LABEL: implicit_maps_templated_class +void implicit_maps_templated_class (int a){ + SSST<123> ssst(a, (double)a, a); + + // CK15: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}}) + // CK15-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + + // CK15-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK15-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK15-DAG: store i8* [[VALBP0:%.+]], i8** [[BP0]], + // CK15-DAG: store i8* [[VALP0:%.+]], i8** [[P0]], + // CK15-DAG: [[VALBP0]] = bitcast [[ST]]* [[DECL:%.+]] to i8* + // CK15-DAG: [[VALP0]] = bitcast [[ST]]* [[DECL]] to i8* + + // CK15-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1 + // CK15-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1 + // CK15-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK15-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK15-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK15-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK15-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK15-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* + // CK15-64-DAG: store i32 {{.+}}, i32* [[CADDR]], + + // CK15: call void [[KERNEL:@.+]]([[ST]]* [[DECL]], i[[sz]] {{.+}}) + ssst.foo(456); + + // CK15: define {{.*}}void @{{.+}}bar{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}}) + // CK15-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}}) + // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + + // CK15-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK15-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK15-DAG: store i8* [[VALBP0:%.+]], i8** [[BP0]], + // CK15-DAG: store i8* [[VALP0:%.+]], i8** [[P0]], + // CK15-DAG: [[VALBP0]] = bitcast [[ST]]* [[DECL:%.+]] to i8* + // CK15-DAG: [[VALP0]] = bitcast [[ST]]* [[DECL]] to i8* + + // CK15-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1 + // CK15-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1 + // CK15-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK15-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK15-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK15-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK15-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK15-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* + // CK15-64-DAG: store i32 {{.+}}, i32* [[CADDR]], + + // CK15: call void [[KERNEL2:@.+]]([[ST]]* [[DECL]], i[[sz]] {{.+}}) + ssst.bar<210>(789); +} + +// CK15: define internal void [[KERNEL]]([[ST]]* [[THIS:%.+]], i[[sz]] [[ARG:%.+]]) +// CK15: [[ADDR0:%.+]] = alloca [[ST]]*, +// CK15: [[ADDR1:%.+]] = alloca i[[sz]], +// CK15: store [[ST]]* [[THIS]], [[ST]]** [[ADDR0]], +// CK15: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR1]], +// CK15: [[REF0:%.+]] = load [[ST]]*, [[ST]]** [[ADDR0]], +// CK15-64: [[CADDR1:%.+]] = bitcast i[[sz]]* [[ADDR1]] to i32* +// CK15-64: {{.+}} = load i32, i32* [[CADDR1]], +// CK15-32: {{.+}} = load i32, i32* [[ADDR1]], +// CK15: {{.+}} = getelementptr inbounds [[ST]], [[ST]]* [[REF0]], i32 0, i32 0 + +// CK15: define internal void [[KERNEL2]]([[ST]]* [[THIS:%.+]], i[[sz]] [[ARG:%.+]]) +// CK15: [[ADDR0:%.+]] = alloca [[ST]]*, +// CK15: [[ADDR1:%.+]] = alloca i[[sz]], +// CK15: store [[ST]]* [[THIS]], [[ST]]** [[ADDR0]], +// CK15: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR1]], +// CK15: [[REF0:%.+]] = load [[ST]]*, [[ST]]** [[ADDR0]], +// CK15-64: [[CADDR1:%.+]] = bitcast i[[sz]]* [[ADDR1]] to i32* +// CK15-64: {{.+}} = load i32, i32* [[CADDR1]], +// CK15-32: {{.+}} = load i32, i32* [[ADDR1]], +// CK15: {{.+}} = getelementptr inbounds [[ST]], [[ST]]* [[REF0]], i32 0, i32 0 + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK16 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK16 --check-prefix CK16-64 +// RUN: %clang_cc1 -DCK16 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK16 --check-prefix CK16-64 +// RUN: %clang_cc1 -DCK16 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK16 --check-prefix CK16-32 +// RUN: %clang_cc1 -DCK16 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK16 --check-prefix CK16-32 +#ifdef CK16 + +// CK16-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] +// Map types: +// - OMP_MAP_BYCOPY = 128 +// CK16-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128] + +template +int foo(int d) { + int res = d; + #pragma omp target + { + res += y; + } + return res; +} +// CK16-LABEL: implicit_maps_templated_function +void implicit_maps_templated_function (int a){ + int i = a; + + // CK16: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}}) + // CK16-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK16-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK16-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + + // CK16-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK16-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK16-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK16-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK16-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK16-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK16-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK16-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* + // CK16-64-DAG: store i32 {{.+}}, i32* [[CADDR]], + + // CK16: call void [[KERNEL:@.+]](i[[sz]] [[VAL]]) + i = foo<543>(i); +} +// CK16: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]]) +// CK16: [[ADDR:%.+]] = alloca i[[sz]], +// CK16: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]], +// CK16-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32* +// CK16-64: {{.+}} = load i32, i32* [[CADDR]], +// CK16-32: {{.+}} = load i32, i32* [[ADDR]], + +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK17 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK17 +// RUN: %clang_cc1 -DCK17 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK17 +// RUN: %clang_cc1 -DCK17 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK17 +// RUN: %clang_cc1 -DCK17 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK17 +#ifdef CK17 + +// CK17-DAG: [[ST:%.+]] = type { i32, double } +// CK17-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{16|12}}] +// Map types: OMP_MAP_TO + OMP_MAP_FROM = 2 + 1 +// CK17-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 3] + +class SSS { +public: + int a; + double b; +}; + +// CK17-LABEL: implicit_maps_struct +void implicit_maps_struct (int a){ + SSS s = {a, (double)a}; + + // CK17-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK17-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK17-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + // CK17-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK17-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK17-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK17-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK17-DAG: [[VALBP]] = bitcast [[ST]]* [[DECL:%.+]] to i8* + // CK17-DAG: [[VALP]] = bitcast [[ST]]* [[DECL]] to i8* + + // CK17: call void [[KERNEL:@.+]]([[ST]]* [[DECL]]) + #pragma omp target + { + s.a += 1; + s.b += 1.0; + } +} + +// CK17: define internal void [[KERNEL]]([[ST]]* {{.+}}[[ARG:%.+]]) +// CK17: [[ADDR:%.+]] = alloca [[ST]]*, +// CK17: store [[ST]]* [[ARG]], [[ST]]** [[ADDR]], +// CK17: [[REF:%.+]] = load [[ST]]*, [[ST]]** [[ADDR]], +// CK17: {{.+}} = getelementptr inbounds [[ST]], [[ST]]* [[REF]], i32 0, i32 0 +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK18 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK18 --check-prefix CK18-64 +// RUN: %clang_cc1 -DCK18 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK18 --check-prefix CK18-64 +// RUN: %clang_cc1 -DCK18 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK18 --check-prefix CK18-32 +// RUN: %clang_cc1 -DCK18 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK18 --check-prefix CK18-32 +#ifdef CK18 + +// CK18-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] +// Map types: +// - OMP_MAP_BYCOPY = 128 +// CK18-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128] + +template +int foo(T d) { + #pragma omp target + { + d += (T)1; + } + return d; +} +// CK18-LABEL: implicit_maps_template_type_capture +void implicit_maps_template_type_capture (int a){ + int i = a; + + // CK18: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}}) + // CK18-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK18-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 + // CK18-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 + + // CK18-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 + // CK18-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 + // CK18-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]], + // CK18-DAG: store i8* [[VALP:%.+]], i8** [[P1]], + // CK18-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK18-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8* + // CK18-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], + // CK18-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* + // CK18-64-DAG: store i32 {{.+}}, i32* [[CADDR]], + + // CK18: call void [[KERNEL:@.+]](i[[sz]] [[VAL]]) + i = foo(i); +} +// CK18: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]]) +// CK18: [[ADDR:%.+]] = alloca i[[sz]], +// CK18: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]], +// CK18-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32* +// CK18-64: {{.+}} = load i32, i32* [[CADDR]], +// CK18-32: {{.+}} = load i32, i32* [[ADDR]], + +#endif +#endif