diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -795,14 +795,8 @@ if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && FD && FD->getType()->isReferenceType() && (!VD || !VD->hasAttr())) { - if (OMPBuilder) - // TODO: Move and modify this function based on target regions after - // they land - (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, - OrigVD); - else - (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, - OrigVD); + (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, + OrigVD); ++IRef; ++InitsRef; continue; @@ -886,9 +880,11 @@ Lastprivates[OrigVD->getCanonicalDecl()] == OMPC_LASTPRIVATE_conditional) { // Create/init special variable for lastprivate conditionals. - Address VDAddr = + Address VDAddr = Address::invalid(); + VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( *this, OrigVD); + llvm::Value *V = EmitLoadOfScalar( MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(), AlignmentSource::Decl), @@ -1438,6 +1434,138 @@ const OMPExecutableDirective &, llvm::SmallVectorImpl &) {} +static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, + QualType FieldTy) { + auto *Field = FieldDecl::Create( + C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, + C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + DC->addDecl(Field); + return Field; +} + +Address CodeGenFunction::OMPBuilderCBHelpers::emitLastprivateConditionalInit( + CodeGenFunction &CGF, const VarDecl *VD) { + ASTContext &C = CGF.CGM.getContext(); + auto I = CGF.LastprivateConditionalToTypes.find(CGF.CurFn); + if (I == CGF.LastprivateConditionalToTypes.end()) + I = CGF.LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; + QualType NewType; + const FieldDecl *VDField; + const FieldDecl *FiredField; + LValue BaseLVal; + auto VI = I->getSecond().find(VD); + if (VI == I->getSecond().end()) { + RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); + RD->startDefinition(); + VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); + FiredField = addFieldToRecordDecl(C, RD, C.CharTy); + RD->completeDefinition(); + NewType = C.getRecordType(RD); + Address Addr = + CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); + BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); + I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); + } else { + NewType = std::get<0>(VI->getSecond()); + VDField = std::get<1>(VI->getSecond()); + FiredField = std::get<2>(VI->getSecond()); + BaseLVal = std::get<3>(VI->getSecond()); + } + LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredField); + CGF.EmitStoreOfScalar( + llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), + FiredLVal); + return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); +} + +bool CodeGenFunction::OMPBuilderCBHelpers::EmitOMPCopyinClause( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + InsertPointTy AllocaIP) { + if (!CGF.HaveInsertPoint()) + return false; + // threadprivate_var1 = master_threadprivate_var1; + // operator=(threadprivate_var2, master_threadprivate_var2); + // ... + // __kmpc_barrier(&loc, global_tid); + llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); + llvm::DenseSet CopiedVars; + llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; + for (const auto *C : D.template getClausesOfKind()) { + auto IRef = C->varlist_begin(); + auto ISrcRef = C->source_exprs().begin(); + auto IDestRef = C->destination_exprs().begin(); + for (const Expr *AssignOp : C->assignment_ops()) { + const auto *VD = cast(cast(*IRef)->getDecl()); + QualType Type = VD->getType(); + if (CopiedVars.insert(VD->getCanonicalDecl()).second) { + // Get the address of the master variable. If we are emitting code with + // TLS support, the address is passed from the master as field in the + // captured declaration. + Address MasterAddr = Address::invalid(); + if (CGF.getLangOpts().OpenMPUseTLS && + CGF.getContext().getTargetInfo().isTLSSupported()) { + assert(CGF.CapturedStmtInfo->lookup(VD) && + "Copyin threadprivates should have been captured!"); + const auto *VDCanon = VD->getCanonicalDecl(); + auto I = CGF.LocalDeclMap.find(VDCanon); + if (I == CGF.LocalDeclMap.end()) { + Address Addr(CGF.CGM.GetAddrOfGlobal(VDCanon), + CGF.getContext().getDeclAlign(VDCanon)); + CGF.LocalDeclMap.try_emplace(VDCanon, Addr); + } + DeclRefExpr DRE(CGF.getContext(), const_cast(VD), true, + (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); + MasterAddr = CGF.EmitLValue(&DRE).getAddress(CGF); + CGF.LocalDeclMap.erase(VDCanon); + } else { + MasterAddr = Address(VD->isStaticLocal() + ? CGF.CGM.getStaticLocalDeclAddress(VD) + : CGF.CGM.GetAddrOfGlobal(VD), + CGF.getContext().getDeclAlign(VD)); + } + + // Get the address of the threadprivate variable. + Address PrivateAddr = CGF.EmitLValue(*IRef).getAddress(CGF); + if (CopiedVars.size() == 1) { + // At first check if current thread is a master thread. If it is, no + // need to copy data. + InsertPointTy CopyBeginIP = OMPBuilder->CreateCopyinClauseBlocks( + AllocaIP, MasterAddr.getPointer(), PrivateAddr.getPointer(), + CGF.IntPtrTy, /*BranchtoEnd*/ false); + CGF.Builder.restoreIP(CopyBeginIP); + CopyBegin = CopyBeginIP.getBlock(); + assert(CopyBegin && "CopyIn Basic Block was not generated!"); + llvm::BranchInst *EntryCBI = llvm::dyn_cast_or_null( + AllocaIP.getBlock()->getTerminator()); + CopyEnd = EntryCBI ? EntryCBI->getSuccessor(1) : nullptr; + assert(CopyEnd && "No unique successor for CopyIn Basic Block!"); + } + const auto *SrcVD = + cast(cast(*ISrcRef)->getDecl()); + const auto *DestVD = + cast(cast(*IDestRef)->getDecl()); + CGF.EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); + } + ++IRef; + ++ISrcRef; + ++IDestRef; + } + } + if (CopyEnd) { + // Exit out of copying procedure for non-master thread. + CGF.EmitBranch(CopyEnd); + if (llvm::Instruction *CopyEndTI = + CopyEnd ? CopyEnd->getTerminator() : nullptr) + CGF.Builder.SetInsertPoint(CopyEndTI); + else + CGF.Builder.SetInsertPoint(CopyEnd); + return true; + } + return false; +} + Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( CodeGenFunction &CGF, const VarDecl *VD) { CodeGenModule &CGM = CGF.CGM; @@ -1528,6 +1656,293 @@ } return OS.str().str(); } + +bool CodeGenFunction::OMPBuilderCBHelpers::EmitOMPFirstprivateClause( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + OMPPrivateScope &PrivateScope, + llvm::SmallDenseMap + &CapturedVarsInfoMap) { + if (!CGF.HaveInsertPoint()) + return false; + + CodeGenModule &CGM = CGF.CGM; + bool DeviceConstTarget = + CGF.getLangOpts().OpenMPIsDevice && + isOpenMPTargetExecutionDirective(D.getDirectiveKind()); + bool FirstprivateIsLastprivate = false; + llvm::DenseMap Lastprivates; + for (const auto *C : D.getClausesOfKind()) { + for (const auto *D : C->varlists()) + Lastprivates.try_emplace( + cast(cast(D)->getDecl())->getCanonicalDecl(), + C->getKind()); + } + llvm::DenseSet EmittedAsFirstprivate; + llvm::SmallVector CaptureRegions; + getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); + // Force emission of the firstprivate copy if the directive does not emit + // outlined function, like omp for, omp simd, omp distribute etc. + bool MustEmitFirstprivateCopy = + CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; + for (const auto *C : D.getClausesOfKind()) { + const auto *IRef = C->varlist_begin(); + const auto *InitsRef = C->inits().begin(); + for (const Expr *IInit : C->private_copies()) { + const auto *OrigVD = cast(cast(*IRef)->getDecl()); + bool ThisFirstprivateIsLastprivate = + Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; + const FieldDecl *FD = CGF.CapturedStmtInfo->lookup(OrigVD); + const auto *VD = cast(cast(IInit)->getDecl()); + if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && + !FD->getType()->isReferenceType() && + (!VD || !VD->hasAttr())) { + if (CapturedVarsInfoMap[OrigVD].CapturedKind != + CapturedVarInfo::ByValue || + OrigVD->isConstexpr()) { + EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); + ++IRef; + ++InitsRef; + continue; + } + } + // Do not emit copy for firstprivate constant variables in target regions, + // captured by reference. + if (DeviceConstTarget && OrigVD->getType().isConstant(CGF.getContext()) && + FD && FD->getType()->isReferenceType() && + (!VD || !VD->hasAttr())) { + // TODO: Move and modify this function based on target regions after + // they land + (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, + OrigVD); + ++IRef; + ++InitsRef; + continue; + } + FirstprivateIsLastprivate = + FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; + if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { + const auto *VDInit = + cast(cast(*InitsRef)->getDecl()); + bool IsRegistered; + DeclRefExpr DRE(CGF.getContext(), const_cast(OrigVD), + /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, + (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); + LValue OriginalLVal; + if (!FD) { + // Check if the firstprivate variable is just a constant value. + ConstantEmission CE = CGF.tryEmitAsConstant(&DRE); + if (CE && !CE.isReference()) { + // Constant value, no need to create a copy. + ++IRef; + ++InitsRef; + continue; + } + if (CE && CE.isReference()) { + OriginalLVal = CE.getReferenceLValue(CGF, &DRE); + } else { + assert(!CE && "Expected non-constant firstprivate."); + OriginalLVal = CGF.EmitLValue(&DRE); + } + } else { + OriginalLVal = CGF.EmitLValue(&DRE); + } + QualType Type = VD->getType(); + if (Type->isArrayType()) { + // Emit VarDecl with copy init for arrays. + // Get the address of the original variable captured in current + // captured region. + IsRegistered = PrivateScope.addPrivate( + OrigVD, [&CGF, VD, Type, OriginalLVal, VDInit]() { + AutoVarEmission Emission = CGF.EmitAutoVarAlloca(*VD); + const Expr *Init = VD->getInit(); + if (!isa(Init) || + CGF.isTrivialInitializer(Init)) { + // Perform simple memcpy. + LValue Dest = + CGF.MakeAddrLValue(Emission.getAllocatedAddress(), Type); + CGF.EmitAggregateAssign(Dest, OriginalLVal, Type); + } else { + CGF.EmitOMPAggregateAssign( + Emission.getAllocatedAddress(), + OriginalLVal.getAddress(CGF), Type, + [&CGF, VDInit, Init](Address DestElement, + Address SrcElement) { + // Clean up any temporaries needed by the + // initialization. + RunCleanupsScope InitScope(CGF); + // Emit initialization for single element. + CGF.setAddrOfLocalVar(VDInit, SrcElement); + CGF.EmitAnyExprToMem(Init, DestElement, + Init->getType().getQualifiers(), + /*IsInitializer*/ false); + CGF.LocalDeclMap.erase(VDInit); + }); + } + CGF.EmitAutoVarCleanups(Emission); + return Emission.getAllocatedAddress(); + }); + } else { + Address OriginalAddr = OriginalLVal.getAddress(CGF); + IsRegistered = PrivateScope.addPrivate( + OrigVD, [&CGF, VDInit, OriginalAddr, VD, + ThisFirstprivateIsLastprivate, OrigVD, &Lastprivates, + IRef, &IInit, &CGM, &CapturedVarsInfoMap, &FD]() { + Address VarAddr = OriginalAddr; + ASTContext &Ctx = CGF.getContext(); + QualType UIntPtrTy = Ctx.getUIntPtrType(); + llvm::Value *V = CapturedVarsInfoMap[OrigVD].PassedValue; + if (CapturedVarsInfoMap[OrigVD].CapturedKind == + CapturedVarInfo::ByValue) { + CGF.setAddrOfLocalVar(VDInit, OriginalAddr); + CharUnits AddrAlign = Ctx.getDeclAlign(&*VD); + Address DeclPtr = CGF.CreateMemTemp(UIntPtrTy, AddrAlign, + VD->getName() + ".addr"); + LValue DstLV = CGF.MakeAddrLValue(DeclPtr, UIntPtrTy, + AlignmentSource::Decl); + CGF.EmitStoreOfScalar(V, DstLV); + if (!OrigVD->getType()->isPointerType()) { + VarAddr = DeclPtr; + if (VD->getType() != UIntPtrTy) + VarAddr = castValueFromUintptr( + CGF, (*IRef)->getExprLoc(), VD->getType(), + VD->getName(), + CGF.MakeAddrLValue(DeclPtr, UIntPtrTy)); + Address OMPAddress = getAddressOfLocalVariable(CGF, VD); + if (OMPAddress.isValid()) { + LValue VarAddrLV = CGF.MakeAddrLValue( + VarAddr, VD->getType(), AlignmentSource::Decl); + llvm::Value *CV = + CGF.EmitLoadOfScalar(VarAddrLV, IInit->getBeginLoc()); + CGF.EmitStoreOfScalar( + CV, CGF.MakeAddrLValue(OMPAddress, VD->getType(), + AlignmentSource::Decl)); + VarAddr = OMPAddress; + } + CGF.setAddrOfLocalVar(VD, VarAddr); + } else { + llvm_unreachable("Unhandled Captured by Value VarDecl!"); + } + } else { + QualType VDPtrTy = Ctx.getPointerType(VD->getType()); + CharUnits AddrAlign = Ctx.getDeclAlign(&*VD); + Address DeclPtr = CGF.CreateMemTemp(VDPtrTy, AddrAlign, + VD->getName() + ".addr"); + LValue DstLV = CGF.MakeAddrLValue(DeclPtr, VDPtrTy, + AlignmentSource::Decl); + CGF.EmitStoreOfScalar(V, DstLV); + llvm::Value *PtrLd = + CGF.EmitLoadOfScalar(DstLV, IInit->getBeginLoc()); + VarAddr = Address(PtrLd, AddrAlign); + if (VD->getType() != VDPtrTy) + VarAddr = castValueFromUintptr( + CGF, (*IRef)->getExprLoc(), VD->getType(), + VD->getName(), CGF.MakeAddrLValue(VarAddr, VDPtrTy)); + CGF.setAddrOfLocalVar(VD, VarAddr); + CGF.setAddrOfLocalVar(VDInit, VarAddr); + const auto *cleanups = + dyn_cast(VD->getInit()); + const Expr *Init = + (cleanups) ? cleanups->getSubExpr() : VD->getInit(); + if (isa(Init) && + !CGF.isTrivialInitializer(Init)) { + if (cleanups) { + CGF.enterFullExpression(cleanups); + RunCleanupsScope InitScope(CGF); + } + + Address DstPtr = CGF.CreateMemTemp(VD->getType(), AddrAlign, + VD->getName()); + CGF.EmitAnyExprToMem(Init, DstPtr, + Init->getType().getQualifiers(), + /*IsInitializer*/ false); + CGF.LocalDeclMap.erase(VDInit); + VarAddr = DstPtr; + // TODO emit cleanup info for variable + } + } + + CGF.LocalDeclMap.erase(VDInit); + if (ThisFirstprivateIsLastprivate && + Lastprivates[OrigVD->getCanonicalDecl()] == + OMPC_LASTPRIVATE_conditional) { + // Create/init special variable for lastprivate conditionals. + Address VDAddr = Address::invalid(); + VDAddr = emitLastprivateConditionalInit(CGF, OrigVD); + llvm::Value *V = CGF.EmitLoadOfScalar( + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), + (*IRef)->getType(), + AlignmentSource::Decl), + (*IRef)->getExprLoc()); + CGF.EmitStoreOfScalar( + V, CGF.MakeAddrLValue(VDAddr, (*IRef)->getType(), + AlignmentSource::Decl)); + CGF.LocalDeclMap.erase(VD); + CGF.setAddrOfLocalVar(VD, VDAddr); + return VDAddr; + } + return CGF.GetAddrOfLocalVar(VD); + }); + } + assert(IsRegistered && + "firstprivate var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + } + ++IRef; + ++InitsRef; + } + } + return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); +} + +void CodeGenFunction::OMPBuilderCBHelpers::GenerateOpenMPCapturedVars( + CodeGenFunction &CGF, const CapturedStmt &S, + llvm::SmallDenseMap + &CapturedVarsValueMap) { + const RecordDecl *RD = S.getCapturedRecordDecl(); + auto CurField = RD->field_begin(); + auto CurCap = S.captures().begin(); + for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), + E = S.capture_init_end(); + I != E; ++I, ++CurField, ++CurCap) { + if (CurField->hasCapturedVLAType() || CurCap->capturesThis()) { + // do nothing + } else if (CurCap->capturesVariableByCopy()) { + llvm::Value *CV = + CGF.EmitLoadOfScalar(CGF.EmitLValue(*I), CurCap->getLocation()); + + // If the field is not a pointer, we need to save the actual value + // and load it as a void pointer. + if (!CurField->getType()->isAnyPointerType()) { + ASTContext &Ctx = CGF.getContext(); + Address DstAddr = CGF.CreateMemTemp( + Ctx.getUIntPtrType(), + Twine(CurCap->getCapturedVar()->getName(), ".casted")); + LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); + + llvm::Value *SrcAddrVal = CGF.EmitScalarConversion( + DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), + Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); + LValue SrcLV = + CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); + + // Store the value using the source type pointer. + CGF.EmitStoreThroughLValue(RValue::get(CV), SrcLV); + + // Load the value using the destination type pointer. + CV = CGF.EmitLoadOfScalar(DstLV, CurCap->getLocation()); + } + CapturedVarsValueMap[CurCap->getCapturedVar()] = { + CV, CapturedVarInfo::ByValue}; + } else { + assert(CurCap->capturesVariable() && "Expected capture by reference."); + CapturedVarsValueMap[CurCap->getCapturedVar()] = { + CGF.EmitLValue(*I).getAddress(CGF).getPointer(), + CapturedVarInfo::ByRef}; + } + } +} + void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { // Check if we have any if clause associated with the directive. @@ -1561,6 +1976,7 @@ llvm::Value &Val, llvm::Value *&ReplVal) { // The next line is appropriate only for variables (Val) with the // data-sharing attribute "shared". + ReplVal = &Val; return CodeGenIP; @@ -1569,17 +1985,70 @@ const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); - auto BodyGenCB = [ParallelRegionBodyStmt, - this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - llvm::BasicBlock &ContinuationBB) { + OMPParallelScope Scope(*this, S); + llvm::SmallDenseMap + CapturedVarsInfoMap; + + OMPBuilderCBHelpers::GenerateOpenMPCapturedVars(*this, *CS, + CapturedVarsInfoMap); + + auto BodyGenCB = [ParallelRegionBodyStmt, &S, this, &OMPBuilder, + &CapturedVarsInfoMap](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + llvm::BasicBlock &ContinuationBB) { OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP, ContinuationBB); + + OMPPrivateScope PrivateScope(*this); + llvm::BasicBlock *OMP_Entry = AllocaIP.getBlock(); + // Emitting Copyin clause + Builder.SetInsertPoint(OMP_Entry->getTerminator()); + bool Copyins = + OMPBuilderCBHelpers::EmitOMPCopyinClause(*this, S, AllocaIP); + + // restoring alloca insertion point to entry block since it moved while + // emitting 'copyin' blocks + AllocaInsertPt = OMPBuilderCBHelpers::GetAllocaInsertPoint(OMP_Entry); + llvm::BranchInst *EntryBI = + cast(OMP_Entry->getTerminator()); + EntryBI->removeFromParent(); + + if (Builder.GetInsertBlock() == OMP_Entry) + Builder.SetInsertPoint(OMP_Entry); + OMPBuilderCBHelpers::EmitOMPFirstprivateClause(*this, S, PrivateScope, + CapturedVarsInfoMap); + if (Copyins) { + // Emit implicit barrier to synchronize threads and avoid data races on + // propagation master's thread values of threadprivate variables to + // local instances of that variables of all other implicit threads. + OMPBuilder->CreateBarrier(Builder, OMPD_barrier, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } + + EmitOMPPrivateClause(S, PrivateScope); + (void)PrivateScope.Privatize(); + + if (!OMP_Entry->getTerminator()) { + OMP_Entry->getInstList().push_back(EntryBI); + } else if (Builder.GetInsertBlock()->getTerminator()) { + EntryBI->dropAllReferences(); + EntryBI->deleteValue(); + } else { + Builder.Insert(EntryBI); + } + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt, CodeGenIP, ContinuationBB); + llvm::Instruction *ContTI = ContinuationBB.getTerminator(); + ContTI->removeFromParent(); + Builder.SetInsertPoint(&ContinuationBB); + PrivateScope.ForceCleanup(); + Builder.Insert(ContTI); }; CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); + Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, FiniCB, IfCond, NumThreads, ProcBind, S.hasCancel())); @@ -3261,13 +3730,10 @@ if (const auto *HintClause = S.getSingleClause()) Hint = HintClause->getHint(); - // TODO: This is slightly different from what's currently being done in - // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything - // about typing is final. llvm::Value *HintInst = nullptr; if (Hint) HintInst = - Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); + Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.IntPtrTy, false); auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1555,6 +1555,14 @@ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + struct CapturedVarInfo { + + enum CapturedVarKind { ByValue, ByRef }; + + llvm::Value *PassedValue; + CapturedVarKind CapturedKind; + }; + /// Cleanup action for allocate support. class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { @@ -1596,6 +1604,34 @@ static Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD); + /// Emit first private clause + /// + /// \param CGF CodeGenFunction for function containing the OMP directive + /// this is associated with \param D The directive the firstprivate is + /// associated with \param PrivateScope for all captured variables in + /// current associated directive \param CapturedVarsInfoMap Map of captured + /// variables and generated associated values + /// + /// \return True if any variable were generated + static bool EmitOMPFirstprivateClause( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + OMPPrivateScope &PrivateScope, + llvm::SmallDenseMap + &CapturedVarsInfoMap); + + static bool EmitOMPCopyinClause(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + InsertPointTy AllocaIP); + + /// Create specialized alloca to handle lastprivate conditionals. + static Address emitLastprivateConditionalInit(CodeGenFunction &CGF, + const VarDecl *VD); + + static void GenerateOpenMPCapturedVars( + CodeGenFunction &CFG, const CapturedStmt &S, + llvm::SmallDenseMap + &CapturedVarsInfoMap); + /// Get the platform-specific name separator. /// \param Parts different parts of the final name that needs separation /// \param FirstSeparator First separator used between the initial two @@ -1711,6 +1747,14 @@ }; private: + /// Maps local variables marked as lastprivate conditional to their internal + /// types. + llvm::DenseMap, + std::tuple>> + LastprivateConditionalToTypes; + /// CXXThisDecl - When generating code for a C++ member function, /// this will hold the implicit 'this' declaration. ImplicitParamDecl *CXXABIThisDecl = nullptr; diff --git a/clang/test/OpenMP/parallel_copyin_codegen.cpp b/clang/test/OpenMP/parallel_copyin_codegen.cpp --- a/clang/test/OpenMP/parallel_copyin_codegen.cpp +++ b/clang/test/OpenMP/parallel_copyin_codegen.cpp @@ -1,9 +1,12 @@ -// RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-linux -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-linux -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,CHECK // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-linux -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-linux -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-linux -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,CHECK // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -DLAMBDA -triple x86_64-linux -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -fblocks -DBLOCKS -triple x86_64-linux -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -fnoopenmp-use-tls -x c++ -triple x86_64-linux -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,IRBUILDER +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-linux -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -fnoopenmp-use-tls -x c++ -triple x86_64-linux -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,IRBUILDER // RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -x c++ -triple x86_64-linux -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-linux -emit-pch -o %t %s @@ -46,21 +49,22 @@ ~S() {} }; -// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } -// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +// ALL-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// ALL-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } // CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* +// IRBUILDER-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 34, i32 0, i32 0, i8* // TLS-CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } // TLS-CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } // TLS-CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* -// CHECK-DAG: [[T_VAR:@.+]] = internal global i{{[0-9]+}} 1122, -// CHECK-DAG: [[VEC:@.+]] = internal global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], -// CHECK-DAG: [[S_ARR:@.+]] = internal global [2 x [[S_FLOAT_TY]]] zeroinitializer, -// CHECK-DAG: [[VAR:@.+]] = internal global [[S_FLOAT_TY]] zeroinitializer, -// CHECK-DAG: [[TMAIN_T_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global i{{[0-9]+}} 333, -// CHECK-DAG: [[TMAIN_VEC:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 3, i{{[0-9]+}} 3], -// CHECK-DAG: [[TMAIN_S_ARR:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x [[S_INT_TY]]] zeroinitializer, -// CHECK-DAG: [[TMAIN_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global [[S_INT_TY]] zeroinitializer, +// ALL-DAG: [[T_VAR:@.+]] = internal global i{{[0-9]+}} 1122, +// ALL-DAG: [[VEC:@.+]] = internal global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +// ALL-DAG: [[S_ARR:@.+]] = internal global [2 x [[S_FLOAT_TY]]] zeroinitializer, +// ALL-DAG: [[VAR:@.+]] = internal global [[S_FLOAT_TY]] zeroinitializer, +// ALL-DAG: [[TMAIN_T_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global i{{[0-9]+}} 333, +// ALL-DAG: [[TMAIN_VEC:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 3, i{{[0-9]+}} 3], +// ALL-DAG: [[TMAIN_S_ARR:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x [[S_INT_TY]]] zeroinitializer, +// ALL-DAG: [[TMAIN_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global [[S_INT_TY]] zeroinitializer, // TLS-CHECK-DAG: [[T_VAR:@.+]] = internal thread_local global i{{[0-9]+}} 1122, // TLS-CHECK-DAG: [[VEC:@.+]] = internal thread_local global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], // TLS-CHECK-DAG: [[S_ARR:@.+]] = internal thread_local global [2 x [[S_FLOAT_TY]]] zeroinitializer, @@ -230,14 +234,14 @@ #endif } -// CHECK-LABEL: @main -// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN:@.+]]([[S_FLOAT_TY]]* [[TEST]], [[S_FLOAT_TY]]* -// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*)) -// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*)) -// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]() -// CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* -// CHECK: ret +// ALL-LABEL: @main +// ALL: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// ALL: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN:@.+]]([[S_FLOAT_TY]]* [[TEST]], [[S_FLOAT_TY]]* +// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*)) +// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*)) +// ALL: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]() +// ALL: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* +// ALL: ret // TLS-CHECK-LABEL: @main // TLS-CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], @@ -248,22 +252,56 @@ // TLS-CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* // TLS-CHECK: ret -// CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// IRBUILDER: define internal {{.*}}void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32 +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] + +// threadprivate_t_var = t_var; +// IRBUILDER: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[T_VAR]] +// IRBUILDER: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}} +// IRBUILDER: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[T_VAR]] to i{{[0-9]+}}), %{{.+}} +// IRBUILDER: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] + +// IRBUILDER: ret void + +// IRBUILDER: [[DONE]] +// IRBUILDER: [[GTID_CALL:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// IRBUILDER: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID_CALL]]) +// IRBUILDER: add nsw i32 %{{.+}}, 1 + +// IRBUILDER: [[NOT_MASTER]] +// IRBUILDER: load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR]], +// IRBUILDER: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, + +// ALL: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]], // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]], +// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32 +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] + // TLS-CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, // TLS-CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], // threadprivate_t_var = t_var; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[T_VAR]] -// CHECK: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}} -// CHECK: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[T_VAR]] to i{{[0-9]+}}), %{{.+}} -// CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] -// CHECK: [[NOT_MASTER]] -// CHECK: load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR]], -// CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[T_VAR]] +// ALL: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}} +// ALL: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[T_VAR]] to i{{[0-9]+}}), %{{.+}} +// ALL: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] + +// IRBUILDER: ret void +// IRBUILDER: [[DONE]] +// IRBUILDER: [[GTID_CALL:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// IRBUILDER: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID_CALL]]) + +// ALL: [[NOT_MASTER]] +// ALL: load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR]], +// ALL: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, // TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** % // TLS-CHECK: [[MASTER_REF2:%.+]] = load [2 x i32]*, [2 x i32]** % @@ -278,21 +316,21 @@ // TLS-CHECK: store i32 [[MASTER_VAL]], i32* [[T_VAR]] // threadprivate_vec = vec; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VEC]] -// CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VEC]] +// ALL: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF2]] to i8* // TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), i8* align {{[0-9]+}} [[MASTER_CAST]] // threadprivate_s_arr = s_arr; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[S_ARR]] -// CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 -// CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] -// CHECK: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] -// CHECK: [[S_ARR_BODY]] -// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}) -// CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[S_ARR]] +// ALL: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// ALL: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 +// ALL: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] +// ALL: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] +// ALL: [[S_ARR_BODY]] +// ALL: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}) +// ALL: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[MASTER_REF3]] to [[S_FLOAT_TY]]* // TLS-CHECK-DAG: [[S_ARR_SRC_BEGIN:%.+]] = phi [[S_FLOAT_TY]]* {{.*}}[[MASTER_CAST]] @@ -305,8 +343,8 @@ // TLS-CHECK: [[ARR_DONE]] // threadprivate_var = var; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VAR]] -// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{%.+}}, [[S_FLOAT_TY]]* {{.*}}[[VAR]]) +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VAR]] +// ALL: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{%.+}}, [[S_FLOAT_TY]]* {{.*}}[[VAR]]) // CHECK: [[DONE]] // TLS-CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{.*}}[[VAR]], [[S_FLOAT_TY]]* {{.*}}[[MASTER_REF4]]) @@ -356,36 +394,67 @@ // TLS-CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) // TLS-CHECK: ret void -// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() -// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], -// CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[TEST]], [[S_INT_TY]]* -// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*)) -// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*)) -// CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* -// CHECK: ret +// ALL: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() +// ALL: [[TEST:%.+]] = alloca [[S_INT_TY]], +// ALL: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[TEST]], [[S_INT_TY]]* +// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*)) +// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*)) +// ALL: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* +// ALL: ret // TLS-CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() // TLS-CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], // TLS-CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[TEST]], [[S_INT_TY]]* // TLS-CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*), // TLS-CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*), + +// IRBUILDER: define internal {{.*}}void [[TMAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32 +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] +// threadprivate_t_var = t_var; +// IRBUILDER: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_T_VAR]] +// IRBUILDER: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}} +// IRBUILDER: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}), %{{.+}} +// IRBUILDER: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] +// IRBUILDER: ret void +// IRBUILDER: [[DONE]] +// IRBUILDER: [[GTID_CALL:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// IRBUILDER: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID_CALL]]) +// IRBUILDER: [[NOT_MASTER]] +// IRBUILDER: load i{{[0-9]+}}, i{{[0-9]+}}* [[TMAIN_T_VAR]], +// IRBUILDER: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, + // -// CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// ALL: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]], // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]], + +// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32 +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] // // TLS-CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) // TLS-CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], // threadprivate_t_var = t_var; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_T_VAR]] -// CHECK: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}} -// CHECK: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}), %{{.+}} -// CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] -// CHECK: [[NOT_MASTER]] -// CHECK: load i{{[0-9]+}}, i{{[0-9]+}}* [[TMAIN_T_VAR]], align 128 -// CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, align 128 +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_T_VAR]] +// ALL: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}} +// ALL: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}), %{{.+}} +// ALL: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] + +// IRBUILDER: ret void + +// IRBUILDER: [[DONE]] +// IRBUILDER: [[GTID_CALL:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// IRBUILDER: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID_CALL]]) + +// ALL: [[NOT_MASTER]] +// ALL: load i{{[0-9]+}}, i{{[0-9]+}}* [[TMAIN_T_VAR]], align 128 +// ALL: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, align 128 // TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** % // TLS-CHECK: [[MASTER_REF1:%.+]] = load [2 x i32]*, [2 x i32]** % @@ -400,21 +469,21 @@ // TLS-CHECK: store i32 [[MASTER_VAL]], i32* [[TMAIN_T_VAR]], align 128 // threadprivate_vec = vec; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VEC]] -// CHECK: call {{.*}}void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VEC]] +// ALL: call {{.*}}void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF1]] to i8* // TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), i8* align {{[0-9]+}} [[MASTER_CAST]] // threadprivate_s_arr = s_arr; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_S_ARR]] -// CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 -// CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] -// CHECK: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] -// CHECK: [[S_ARR_BODY]] -// CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}}) -// CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_S_ARR]] +// ALL: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// ALL: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 +// ALL: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] +// ALL: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] +// ALL: [[S_ARR_BODY]] +// ALL: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}}) +// ALL: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[MASTER_REF2]] to [[S_INT_TY]]* // TLS-CHECK-DAG: [[S_ARR_SRC_BEGIN:%.+]] = phi [[S_INT_TY]]* {{.*}}[[MASTER_CAST]] @@ -427,8 +496,8 @@ // TLS-CHECK: [[ARR_DONE]] // threadprivate_var = var; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VAR]] -// CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{%.+}}, [[S_INT_TY]]* {{.*}}[[TMAIN_VAR]]) +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VAR]] +// ALL: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{%.+}}, [[S_INT_TY]]* {{.*}}[[TMAIN_VAR]]) // CHECK: [[DONE]] // TLS-CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{.*}}[[TMAIN_VAR]], [[S_INT_TY]]* {{.*}}[[MASTER_REF3]]) diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -1,8 +1,11 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,ALL-32,CHECK,CHECK-32 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,ALL-32,CHECK,CHECK-32 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA -check-prefix=LAMBDA-32 %s // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS -check-prefix=BLOCKS-32 %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,ALL-32,IRBUILDER,IRBUILDER-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,ALL-32,IRBUILDER,IRBUILDER-32 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-pc-linux-gnu -emit-pch -o %t %s @@ -11,11 +14,14 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,ALL-64,CHECK,CHECK-64 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,ALL-64,CHECK,CHECK-64 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA -check-prefix=LAMBDA-64 %s // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS -check-prefix=BLOCKS-64 %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,ALL-64,IRBUILDER,IRBUILDER-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,ALL-64,IRBUILDER,IRBUILDER-64 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-pc-linux-gnu -emit-pch -o %t %s @@ -62,6 +68,7 @@ int e[4]; SS(int &d) : a(0), b(0), c(d) { #pragma omp parallel firstprivate(a, b, c, e) + #ifdef LAMBDA [&]() { ++this->a, --b, (this)->c /= 1; @@ -119,12 +126,12 @@ ~S() {} }; -// CHECK: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 +// ALL: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 // LAMBDA: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 // BLOCKS: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 -// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } -// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } -// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} } +// ALL-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// ALL-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +// ALL-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} } template T tmain() { @@ -342,43 +349,73 @@ #endif } -// CHECK: define {{.*}}i{{[0-9]+}} @main() -// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: [[T_VAR:%.+]] = alloca i32, -// CHECK: [[T_VARCAST:%.+]] = alloca [[iz:i64|i32]], -// CHECK: [[SIVARCAST:%.+]] = alloca [[iz]], -// CHECK: [[A:%.+]] = alloca i32, -// CHECK: [[T_VARCAST1:%.+]] = alloca [[iz:i64|i32]], -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) -// CHECK: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]], -// CHECK-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST]] to i32* -// CHECK-64: store i32 [[T_VARVAL]], i32* [[T_VARCONV]], -// CHECK-32: store i32 [[T_VARVAL]], i32* [[T_VARCAST]], -// CHECK: [[T_VARPVT:%.+]] = load [[iz]], [[iz]]* [[T_VARCAST]], -// CHECK: [[SIVARVAL:%.+]] = load i32, i32* @{{.+}}, -// CHECK-64: [[SIVARCONV:%.+]] = bitcast i64* [[SIVARCAST]] to i32* -// CHECK-64: store i32 [[SIVARVAL]], i32* [[SIVARCONV]], -// CHECK-32: store i32 [[SIVARVAL]], i32* [[SIVARCAST]], -// CHECK: [[SIVARPVT:%.+]] = load [[iz]], [[iz]]* [[SIVARCAST]], +// ALL: define {{.*}}i{{[0-9]+}} @main() +// ALL: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// ALL: [[T_VAR:%.+]] = alloca i32, +// ALL: [[T_VARCAST:%.+]] = alloca [[iz:i64|i32]], +// ALL: [[SIVARCAST:%.+]] = alloca [[iz]], +// ALL: [[A:%.+]] = alloca i32, +// ALL: [[T_VARCAST1:%.+]] = alloca [[iz:i64|i32]], +// ALL: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// ALL: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]], +// ALL-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST]] to i32* +// ALL-64: store i32 [[T_VARVAL]], i32* [[T_VARCONV]], +// ALL-32: store i32 [[T_VARVAL]], i32* [[T_VARCAST]], +// ALL: [[T_VARPVT:%.+]] = load [[iz]], [[iz]]* [[T_VARCAST]], +// ALL: [[SIVARVAL:%.+]] = load i32, i32* @{{.+}}, +// ALL-64: [[SIVARCONV:%.+]] = bitcast i64* [[SIVARCAST]] to i32* +// ALL-64: store i32 [[SIVARVAL]], i32* [[SIVARCONV]], +// ALL-32: store i32 [[SIVARVAL]], i32* [[SIVARCAST]], +// ALL: [[SIVARPVT:%.+]] = load [[iz]], [[iz]]* [[SIVARCAST]], // CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, [[iz]], [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[MAIN_MICROTASK:@.+]] to void {{.*}}[[iz]] [[T_VARPVT]],{{.*}}[[iz]] [[SIVARPVT]] -// CHECK: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]], -// CHECK-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST1]] to i32* -// CHECK-64: store i32 [[T_VARVAL]], i32* [[T_VARCONV]], -// CHECK-32: store i32 [[T_VARVAL]], i32* [[T_VARCAST1]], -// CHECK: [[T_VARPVT:%.+]] = load [[iz]], [[iz]]* [[T_VARCAST1]], -// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[iz]])* [[MAIN_MICROTASK1:@.+]] to void {{.*}}[[iz]] [[T_VARPVT]]) -// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]() -// CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* -// CHECK: ret +// IRBUILDER: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[iz]], [2 x i32]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[MAIN_MICROTASK:@.+]] to void {{.*}}{{.*}}[[iz]] [[T_VARPVT]],{{.*}}[[iz]] [[SIVARPVT]] +// ALL: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]], +// ALL-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST1]] to i32* +// ALL-64: store i32 [[T_VARVAL]], i32* [[T_VARCONV]], +// ALL-32: store i32 [[T_VARVAL]], i32* [[T_VARCAST1]], +// ALL: [[T_VARPVT:%.+]] = load [[iz]], [[iz]]* [[T_VARCAST1]], +// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[iz]])* [[MAIN_MICROTASK1:@.+]] to void {{.*}}[[iz]] [[T_VARPVT]]) +// ALL: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]() +// ALL: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* +// ALL: ret +// +// IRBUILDER: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[iz]] [[T_VAR:%.+]]) +// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32 +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] +// IRBUILDER: store [[iz]] [[T_VAR]], [[iz]]* [[T_VAR_ADDR:%.+]], +// IRBUILDER-64: [[BC:%.+]] = bitcast [[iz]]* [[T_VAR_ADDR]] to i32* +// IRBUILDER: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// IRBUILDER: [[T_VAR_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], [[iz]] 4, i8* inttoptr ([[iz]] 1 to i8*)) +// IRBUILDER: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// IRBUILDER: [[T_VAR_PRIV:%.+]] = bitcast i8* [[T_VAR_VOID_PTR]] to i32* +// IRBUILDER-32: [[T_VAR_VAL:%.+]] = load i32, i32* [[T_VAR_ADDR]], +// IRBUILDER-64: [[T_VAR_VAL:%.+]] = load i32, i32* [[BC]], +// IRBUILDER: store i32 [[T_VAR_VAL]], i32* [[T_VAR_PRIV]], +// IRBUILDER: ret void +// IRBUILDER: store i32 0, i32* [[T_VAR_PRIV]], +// IRBUILDER: call void @__kmpc_free(i32 [[GTID]], i8* [[T_VAR_VOID_PTR]], i8* inttoptr ([[iz]] 1 to i8*)) + // // CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [[iz]] {{.*}}%{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(8) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}, [[iz]] {{.*}}[[SIVAR:%.+]]) -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// IRBUILDER: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[iz]] {{.*}}%{{.+}}, [2 x i32]* [[VEC_REF:%.+]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF:%.+]], [[S_FLOAT_TY]]* %{{.+}}, [[iz]] {{.*}}[[SIVAR:%.+]]) +// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32 +// ALL: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[SIVAR7_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], +// ALL: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// ALL: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// IRBUILDER: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// ALL: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// IRBUILDER: [[SIVAR7_PRIV:%.+]] = alloca i{{[0-9]+}}, +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] +// IRBUILDER-64: [[T_VAR_CONV:%.+]] = bitcast i64* [[T_VAR_PRIV]] to i32* +// IRBUILDER: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// IRBUILDER: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], // CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** % // CHECK-NOT: load i{{[0-9]+}}*, i{{[0-9]+}}** % // CHECK-64: [[T_VAR_CONV:%.+]] = bitcast i64* [[T_VAR_PRIV]] to i32* @@ -386,31 +423,35 @@ // CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** % // CHECK-NOT: load i{{[0-9]+}}*, i{{[0-9]+}}** % // CHECK-64: [[SIVAR7_CONV:%.+]] = bitcast i64* [[SIVAR7_PRIV]] to i32* + // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]], -// CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]] to [[S_FLOAT_TY]]* -// CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 -// CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] -// CHECK: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] -// CHECK: [[S_ARR_BODY]] -// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) -// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]]) -// CHECK: call {{.*}} [[ST_TY_DESTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP]]) -// CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] -// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) -// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]]) +// ALL: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]], +// ALL: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// ALL: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]] to [[S_FLOAT_TY]]* +// ALL: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 +// ALL: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] +// ALL: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] +// IRBUILDER: ret void +// ALL: [[S_ARR_BODY]] +// ALL: call {{.*}} [[ST_TY_DEFAULT_CONSTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) +// ALL: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]]) +// ALL: call {{.*}} [[ST_TY_DESTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP]]) +// ALL: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] +// IRBUILDER: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** % +// ALL: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) +// ALL: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]]) +// IRBUILDER-64: [[SIVAR7_CONV:%.+]] = bitcast i64* [[SIVAR7_PRIV]] to i32* // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]]) -// CHECK-64: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR7_CONV]], -// CHECK-32: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR7_PRIV]], +// ALL-64: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR7_CONV]], +// ALL-32: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR7_PRIV]], +// IRBUILDER: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]]) // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) -// CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* +// ALL-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* // CHECK: ret void - // CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[iz]] [[T_VAR:%.+]]) // CHECK: [[GTID_ADDR:%.+]] = alloca i32*, // CHECK: store [[iz]] [[T_VAR]], [[iz]]* [[T_VAR_ADDR:%.+]], @@ -426,13 +467,13 @@ // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[T_VAR_VOID_PTR]], i8* inttoptr ([[iz]] 1 to i8*)) // CHECK: ret void - -// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() -// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], -// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// ALL: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() +// ALL: [[TEST:%.+]] = alloca [[S_INT_TY]], +// ALL: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) // CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void -// CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* -// CHECK: ret +// IRBUILDER: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void +// ALL: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* +// ALL: ret i{{[0-9]+}} // // CHECK: define {{.+}} @{{.+}}([[SS_TY]]* // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 @@ -446,14 +487,70 @@ // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*, [[iz]], [[iz]], [[iz]], [4 x i32]*)* [[SS_MICROTASK:@.+]] to void // CHECK: ret +// IRBUILDER: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32* %{{.+}}, [2 x i32]* %{{.+}}, [2 x [[S_INT_TY]]]* %{{.+}}, [[S_INT_TY]]* %{{.+}}) +// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32 +// IRBUILDER: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}*, align 128 +// IRBUILDER: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], align 128 +// IRBUILDER: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], align 128 +// IRBUILDER: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128 +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] +// IRBUILDER: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** % +// IRBUILDER: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// IRBUILDER: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* +// IRBUILDER: call void @llvm.memcpy.{{.+}}(i8* align 128 [[VEC_DEST]], i8* align 128 [[VEC_SRC]], i{{[0-9]+}} {{[0-9]+}}, i1 +// IRBUILDER: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// IRBUILDER: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_REF]] to [[S_INT_TY]]* +// IRBUILDER: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 +// IRBUILDER: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] +// IRBUILDER: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] + +// IRBUILDER: ret void + +// IRBUILDER: [[S_ARR_BODY]] +// IRBUILDER: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) +// IRBUILDER: call {{.*}} [[S_INT_TY_COPY_CONSTR:@.+]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]]) +// IRBUILDER: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]]) +// IRBUILDER: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] +// IRBUILDER: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** % +// IRBUILDER: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) +// IRBUILDER: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]]) +// IRBUILDER: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]], +// IRBUILDER: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* +// IRBUILDER: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]]) +// IRBUILDER-NOT: call {{.*}}void @__kmpc_barrier( + +// IRBUILDER-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* + +// IRBUILDER: define {{.+}} @{{.+}}([[SS_TY]]* +// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 +// IRBUILDER: store i{{[0-9]+}} 0, i{{[0-9]+}}* % +// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1 +// IRBUILDER: store i8 +// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2 +// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 +// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1 +// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2 +// IRBUILDER: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32**, [[iz]], [[iz]], i32**, [[iz]], [4 x i32]**)* [[SS_MICROTASK:@.+]] to void +// IRBUILDER: ret + // CHECK: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, [[iz]] {{.+}}, [[iz]] {{.+}}, [[iz]] {{.+}}, [4 x i{{[0-9]+}}]* {{.+}}) -// CHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[E_PRIV:%.+]] = alloca [4 x i{{[0-9]+}}], -// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[A_PRIV]] -// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[B_PRIV]] -// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[C_PRIV]] +// IRBUILDER: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32** %{{.+}}, [[iz]] {{.+}}, [[iz]] {{.+}}, i32** %{{.+}}, [[iz]] {{.+}}, [4 x i{{[0-9]+}}]** {{.+}}) +// ALL: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, +// ALL: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, +// ALL: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, +// ALL: [[E_PRIV:%.+]] = alloca [4 x i{{[0-9]+}}], +// ALL: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[A_PRIV]] +// IRBUILDER-64: [[A_CONV:%.+]] = bitcast i64* [[A_PRIV:%.+]] to i32* +// IRBUILDER-64: store i32* [[A_CONV]], i32** [[REFA:%.+]], +// IRBUILDER-32: store i32* [[A_PRIV]], i32** [[REFA:%.+]], +// ALL: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[B_PRIV]] +// IRBUILDER-64: [[B_CONV:%.+]] = bitcast i64* [[B_PRIV:%.+]] to i32* +// ALL: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[C_PRIV]] +// IRBUILDER-64: [[C_CONV:%.+]] = bitcast i64* [[C_PRIV:%.+]] to i32* +// IRBUILDER-64: store i32* [[C_CONV]], i32** [[REFC:%.+]], +// IRBUILDER-32: store i32* [[C_PRIV]], i32** [[REFC:%.+]], // CHECK-64: [[A_CONV:%.+]] = bitcast i64* [[A_PRIV:%.+]] to i32* // CHECK-64: [[B_CONV:%.+]] = bitcast i64* [[B_PRIV:%.+]] to i32* // CHECK-64: [[C_CONV:%.+]] = bitcast i64* [[C_PRIV:%.+]] to i32* @@ -461,26 +558,28 @@ // CHECK-32: store i32* [[A_PRIV]], i32** [[REFA:%.+]], // CHECK-64: store i32* [[C_CONV]], i32** [[REFC:%.+]], // CHECK-32: store i32* [[C_PRIV]], i32** [[REFC:%.+]], -// CHECK: bitcast [4 x i{{[0-9]+}}]* [[E_PRIV]] to i8* -// CHECK: bitcast [4 x i{{[0-9]+}}]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy -// CHECK: store [4 x i{{[0-9]+}}]* [[E_PRIV]], [4 x i{{[0-9]+}}]** [[REFE:%.+]], +// ALL: bitcast [4 x i{{[0-9]+}}]* [[E_PRIV]] to i8* +// ALL: bitcast [4 x i{{[0-9]+}}]* %{{.+}} to i8* +// ALL: call void @llvm.memcpy +// ALL: store [4 x i{{[0-9]+}}]* [[E_PRIV]], [4 x i{{[0-9]+}}]** [[REFE:%.+]], +// IRBUILDER: ret void +// IRBUILDER: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]], // CHECK-NEXT: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]], -// CHECK-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], -// CHECK-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 -// CHECK-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], -// CHECK-64-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_CONV]], -// CHECK-32-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]], -// CHECK-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1 -// CHECK-64-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_CONV]], -// CHECK-32-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]], -// CHECK-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]], -// CHECK-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]], -// CHECK-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1 -// CHECK-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]], -// CHECK-NEXT: [[E_PRIV:%.+]] = load [4 x i{{[0-9]+}}]*, [4 x i{{[0-9]+}}]** [[REFE]], -// CHECK-NEXT: [[E_PRIV_2:%.+]] = getelementptr inbounds [4 x i{{[0-9]+}}], [4 x i{{[0-9]+}}]* [[E_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK-NEXT: store i32 1111, i32* [[E_PRIV_2]], +// ALL-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], +// ALL-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 +// ALL-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], +// ALL-64-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_CONV]], +// ALL-32-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]], +// ALL-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1 +// ALL-64-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_CONV]], +// ALL-32-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]], +// ALL-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]], +// ALL-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]], +// ALL-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1 +// ALL-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]], +// ALL-NEXT: [[E_PRIV:%.+]] = load [4 x i{{[0-9]+}}]*, [4 x i{{[0-9]+}}]** [[REFE]], +// ALL-NEXT: [[E_PRIV_2:%.+]] = getelementptr inbounds [4 x i{{[0-9]+}}], [4 x i{{[0-9]+}}]* [[E_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// ALL-NEXT: store i32 1111, i32* [[E_PRIV_2]], // CHECK-NEXT: ret void // CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, i32* dereferenceable(4) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}}) diff --git a/clang/test/OpenMP/parallel_private_codegen.cpp b/clang/test/OpenMP/parallel_private_codegen.cpp --- a/clang/test/OpenMP/parallel_private_codegen.cpp +++ b/clang/test/OpenMP/parallel_private_codegen.cpp @@ -1,8 +1,9 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,CHECK // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,CHECK // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,IRBUILDER // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s @@ -91,12 +92,12 @@ } }; -// CHECK: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 +// ALL: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 // LAMBDA: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 // BLOCKS: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 -// CHECK: [[S_FLOAT_TY:%.+]] = type { float } -// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } -// CHECK: [[SST_TY:%.+]] = type { i{{[0-9]+}} } +// ALL: [[S_FLOAT_TY:%.+]] = type { float } +// ALL: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +// ALL: [[SST_TY:%.+]] = type { i{{[0-9]+}} } template T tmain() { S test; @@ -273,63 +274,93 @@ #endif } -// CHECK: define i{{[0-9]+}} @main() -// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void -// CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]() -// CHECK: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* -// CHECK: ret +// ALL: define i{{[0-9]+}} @main() +// ALL: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// ALL: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// ALL: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void +// ALL: = call i{{.+}} [[TMAIN_INT:@.+]]() +// ALL: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* +// ALL: ret // -// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// ALL: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// ALL: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// ALL: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// ALL: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// ALL: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// ALL: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] -// CHECK-NOT: [[T_VAR_PRIV]] -// CHECK-NOT: [[VEC_PRIV]] -// CHECK: {{.+}}: +// IRBUILDER: %{{[0-9]+}} = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_ADDR]] +// IRBUILDER: store i{{[0-9]+}} %{{[0-9]+}}, i{{[0-9]+}}* [[GTID_ADDR_REF:%.+]] +// ALL-NOT: [[T_VAR_PRIV]] +// ALL-NOT: [[VEC_PRIV]] +// ALL: {{.+}}: // CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]* -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) -// CHECK-NOT: [[T_VAR_PRIV]] -// CHECK-NOT: [[VEC_PRIV]] -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) -// CHECK-DAG: call void [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) -// CHECK-DAG: call void [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* -// CHECK: ret void +// IRBUILDER: {{.+}}: +// IRBUILDER: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]* +// ALL: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) +// ALL-NOT: [[T_VAR_PRIV]] +// ALL-NOT: [[VEC_PRIV]] +// ALL: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) +// ALL-DAG: call void [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) +// ALL-DAG: call void [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* +// ALL: ret void -// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() -// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], -// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void -// CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* -// CHECK: ret -// -// CHECK: define {{.+}} @{{.+}}([[SS_TY]]* -// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* % -// CHECK: store i8 +// ALL: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() +// ALL: [[TEST:%.+]] = alloca [[S_INT_TY]], +// ALL: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// ALL: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void +// ALL: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* +// ALL: ret + +// IRBUILDER: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// IRBUILDER: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128 +// IRBUILDER: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], align 128 +// IRBUILDER: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], align 128 +// IRBUILDER: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128 +// IRBUILDER: %{{[0-9]+}} = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_ADDR]] +// IRBUILDER: store i{{[0-9]+}} %{{[0-9]+}}, i{{[0-9]+}}* [[GTID_ADDR_REF:%.+]] +// IRBUILDER-NOT: [[T_VAR_PRIV]] +// IRBUILDER-NOT: [[VEC_PRIV]] +// IRBUILDER-NOT: [[SIVAR_PRIV]] +// IRBUILDER: {{.+}}: +// IRBUILDER: ret void +// IRBUILDER: {{.+}}: +// IRBUILDER: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]* +// IRBUILDER: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]]) +// IRBUILDER-NOT: [[T_VAR_PRIV]] +// IRBUILDER-NOT: [[VEC_PRIV]] +// IRBUILDER: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]]) +// IRBUILDER-DAG: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]]) +// IRBUILDER-DAG: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* + +// ALL: define {{.+}} @{{.+}}([[SS_TY]]* +// ALL: store i{{[0-9]+}} 0, i{{[0-9]+}}* % +// ALL: store i8 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*)* [[SS_MICROTASK:@.+]] to void -// CHECK: ret +// IRBUILDER: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[SS_MICROTASK:@.+]] to void +// ALL: ret // CHECK: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}) -// CHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]], -// CHECK: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]], +// IRBUILDER: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// ALL: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, +// ALL: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, +// ALL: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, +// ALL: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]], +// ALL: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]], +// IRBUILDER: {{.+}}: +// IRBUILDER: ret void // CHECK-NEXT: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]], -// CHECK-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], -// CHECK-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 -// CHECK-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], -// CHECK-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]], -// CHECK-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1 -// CHECK-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]], -// CHECK-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]], -// CHECK-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]], -// CHECK-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1 -// CHECK-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]], +// IRBUILDER: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]], +// ALL-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], +// ALL-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 +// ALL-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], +// ALL-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]], +// ALL-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1 +// ALL-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]], +// ALL-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]], +// ALL-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]], +// ALL-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1 +// ALL-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]], // CHECK-NEXT: ret void // CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) @@ -351,23 +382,30 @@ // CHECK-DAG: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* // CHECK: ret void -// CHECK: define {{.+}} @{{.+}}([[SST_TY]]* % -// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* % +// ALL: define {{.+}} @{{.+}}([[SST_TY]]* % +// ALL: store i{{[0-9]+}} 0, i{{[0-9]+}}* % // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SST_TY]]*)* [[SST_MICROTASK:@.+]] to void -// CHECK: ret +// IRBUILDER: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[SST_MICROTASK:@.+]] to void +// ALL: ret // CHECK: define internal void [[SST_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SST_TY]]* %{{.+}}) +// IRBUILDER: define internal void [[SST_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) // CHECK: [[GTID_ADDR_PTR:%.+]] = alloca i32*, // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_PTR]], // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]], -// CHECK: [[A_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8* inttoptr (i64 2 to i8*)) -// CHECK: [[A_PRIV:%.+]] = bitcast i8* [[A_VOID_PTR]] to i32* -// CHECK: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REF:%.+]], +// IRBUILDER: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// ALL: [[A_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8* inttoptr (i64 2 to i8*)) +// IRBUILDER: [[GTID2:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// ALL: [[A_PRIV:%.+]] = bitcast i8* [[A_VOID_PTR]] to i32* +// ALL: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REF:%.+]], +// IRBUILDER: ret void // CHECK-NEXT: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REF]], -// CHECK-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], -// CHECK-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 -// CHECK-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], +// IRBUILDER: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REF]], +// ALL-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]], +// ALL-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1 +// ALL-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]], // CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 2 to i8*)) +// IRBUILDER: call void @__kmpc_free(i32 [[GTID2]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 2 to i8*)) // CHECK-NEXT: ret void #endif