diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1552,6 +1552,52 @@ const OMPExecutableDirective &, llvm::SmallVectorImpl &) {} +static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, + QualType FieldTy) { + auto *Field = FieldDecl::Create( + C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, + C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + DC->addDecl(Field); + return Field; +} + +Address CodeGenFunction::OMPBuilderCBHelpers::emitLastprivateConditionalInit( + CodeGenFunction &CGF, const VarDecl *VD) { + ASTContext &C = CGF.CGM.getContext(); + auto I = CGF.LastprivateConditionalToTypes.find(CGF.CurFn); + if (I == CGF.LastprivateConditionalToTypes.end()) + I = CGF.LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; + QualType NewType; + const FieldDecl *VDField; + const FieldDecl *FiredField; + LValue BaseLVal; + auto VI = I->getSecond().find(VD); + if (VI == I->getSecond().end()) { + RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); + RD->startDefinition(); + VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); + FiredField = addFieldToRecordDecl(C, RD, C.CharTy); + RD->completeDefinition(); + NewType = C.getRecordType(RD); + Address Addr = + CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); + BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); + I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); + } else { + NewType = std::get<0>(VI->getSecond()); + VDField = std::get<1>(VI->getSecond()); + FiredField = std::get<2>(VI->getSecond()); + BaseLVal = std::get<3>(VI->getSecond()); + } + LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredField); + CGF.EmitStoreOfScalar( + llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), + FiredLVal); + return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); +} + bool CodeGenFunction::OMPBuilderCBHelpers::EmitOMPCopyinClause( CodeGenFunction &CGF, const OMPExecutableDirective &D, InsertPointTy AllocaIP) { @@ -1728,6 +1774,293 @@ } return OS.str().str(); } + +bool CodeGenFunction::OMPBuilderCBHelpers::EmitOMPFirstprivateClause( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + OMPPrivateScope &PrivateScope, + llvm::SmallDenseMap + &CapturedVarsInfoMap) { + if (!CGF.HaveInsertPoint()) + return false; + + CodeGenModule &CGM = CGF.CGM; + bool DeviceConstTarget = + CGF.getLangOpts().OpenMPIsDevice && + isOpenMPTargetExecutionDirective(D.getDirectiveKind()); + bool FirstprivateIsLastprivate = false; + llvm::DenseMap Lastprivates; + for (const auto *C : D.getClausesOfKind()) { + for (const auto *D : C->varlists()) + Lastprivates.try_emplace( + cast(cast(D)->getDecl())->getCanonicalDecl(), + C->getKind()); + } + llvm::DenseSet EmittedAsFirstprivate; + llvm::SmallVector CaptureRegions; + getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); + // Force emission of the firstprivate copy if the directive does not emit + // outlined function, like omp for, omp simd, omp distribute etc. + bool MustEmitFirstprivateCopy = + CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; + for (const auto *C : D.getClausesOfKind()) { + const auto *IRef = C->varlist_begin(); + const auto *InitsRef = C->inits().begin(); + for (const Expr *IInit : C->private_copies()) { + const auto *OrigVD = cast(cast(*IRef)->getDecl()); + bool ThisFirstprivateIsLastprivate = + Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; + const FieldDecl *FD = CGF.CapturedStmtInfo->lookup(OrigVD); + const auto *VD = cast(cast(IInit)->getDecl()); + if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && + !FD->getType()->isReferenceType() && + (!VD || !VD->hasAttr())) { + if (CapturedVarsInfoMap[OrigVD].CapturedKind != + CapturedVarInfo::ByValue || + OrigVD->isConstexpr()) { + EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); + ++IRef; + ++InitsRef; + continue; + } + } + // Do not emit copy for firstprivate constant variables in target regions, + // captured by reference. + if (DeviceConstTarget && OrigVD->getType().isConstant(CGF.getContext()) && + FD && FD->getType()->isReferenceType() && + (!VD || !VD->hasAttr())) { + // TODO: Move and modify this function based on target regions after + // they land + (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, + OrigVD); + ++IRef; + ++InitsRef; + continue; + } + FirstprivateIsLastprivate = + FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; + if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { + const auto *VDInit = + cast(cast(*InitsRef)->getDecl()); + bool IsRegistered; + DeclRefExpr DRE(CGF.getContext(), const_cast(OrigVD), + /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, + (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); + LValue OriginalLVal; + if (!FD) { + // Check if the firstprivate variable is just a constant value. + ConstantEmission CE = CGF.tryEmitAsConstant(&DRE); + if (CE && !CE.isReference()) { + // Constant value, no need to create a copy. + ++IRef; + ++InitsRef; + continue; + } + if (CE && CE.isReference()) { + OriginalLVal = CE.getReferenceLValue(CGF, &DRE); + } else { + assert(!CE && "Expected non-constant firstprivate."); + OriginalLVal = CGF.EmitLValue(&DRE); + } + } else { + OriginalLVal = CGF.EmitLValue(&DRE); + } + QualType Type = VD->getType(); + if (Type->isArrayType()) { + // Emit VarDecl with copy init for arrays. + // Get the address of the original variable captured in current + // captured region. + IsRegistered = PrivateScope.addPrivate( + OrigVD, [&CGF, VD, Type, OriginalLVal, VDInit]() { + AutoVarEmission Emission = CGF.EmitAutoVarAlloca(*VD); + const Expr *Init = VD->getInit(); + if (!isa(Init) || + CGF.isTrivialInitializer(Init)) { + // Perform simple memcpy. + LValue Dest = + CGF.MakeAddrLValue(Emission.getAllocatedAddress(), Type); + CGF.EmitAggregateAssign(Dest, OriginalLVal, Type); + } else { + CGF.EmitOMPAggregateAssign( + Emission.getAllocatedAddress(), + OriginalLVal.getAddress(CGF), Type, + [&CGF, VDInit, Init](Address DestElement, + Address SrcElement) { + // Clean up any temporaries needed by the + // initialization. + RunCleanupsScope InitScope(CGF); + // Emit initialization for single element. + CGF.setAddrOfLocalVar(VDInit, SrcElement); + CGF.EmitAnyExprToMem(Init, DestElement, + Init->getType().getQualifiers(), + /*IsInitializer*/ false); + CGF.LocalDeclMap.erase(VDInit); + }); + } + CGF.EmitAutoVarCleanups(Emission); + return Emission.getAllocatedAddress(); + }); + } else { + Address OriginalAddr = OriginalLVal.getAddress(CGF); + IsRegistered = PrivateScope.addPrivate( + OrigVD, [&CGF, VDInit, OriginalAddr, VD, + ThisFirstprivateIsLastprivate, OrigVD, &Lastprivates, + IRef, &IInit, &CGM, &CapturedVarsInfoMap, &FD]() { + Address VarAddr = OriginalAddr; + ASTContext &Ctx = CGF.getContext(); + QualType UIntPtrTy = Ctx.getUIntPtrType(); + llvm::Value *V = CapturedVarsInfoMap[OrigVD].PassedValue; + if (CapturedVarsInfoMap[OrigVD].CapturedKind == + CapturedVarInfo::ByValue) { + CGF.setAddrOfLocalVar(VDInit, OriginalAddr); + CharUnits AddrAlign = Ctx.getDeclAlign(&*VD); + Address DeclPtr = CGF.CreateMemTemp(UIntPtrTy, AddrAlign, + VD->getName() + ".addr"); + LValue DstLV = CGF.MakeAddrLValue(DeclPtr, UIntPtrTy, + AlignmentSource::Decl); + CGF.EmitStoreOfScalar(V, DstLV); + if (!OrigVD->getType()->isPointerType()) { + VarAddr = DeclPtr; + if (VD->getType() != UIntPtrTy) + VarAddr = castValueFromUintptr( + CGF, (*IRef)->getExprLoc(), VD->getType(), + VD->getName(), + CGF.MakeAddrLValue(DeclPtr, UIntPtrTy)); + Address OMPAddress = getAddressOfLocalVariable(CGF, VD); + if (OMPAddress.isValid()) { + LValue VarAddrLV = CGF.MakeAddrLValue( + VarAddr, VD->getType(), AlignmentSource::Decl); + llvm::Value *CV = + CGF.EmitLoadOfScalar(VarAddrLV, IInit->getBeginLoc()); + CGF.EmitStoreOfScalar( + CV, CGF.MakeAddrLValue(OMPAddress, VD->getType(), + AlignmentSource::Decl)); + VarAddr = OMPAddress; + } + CGF.setAddrOfLocalVar(VD, VarAddr); + } else { + llvm_unreachable("Unhandled Captured by Value VarDecl!"); + } + } else { + QualType VDPtrTy = Ctx.getPointerType(VD->getType()); + CharUnits AddrAlign = Ctx.getDeclAlign(&*VD); + Address DeclPtr = CGF.CreateMemTemp(VDPtrTy, AddrAlign, + VD->getName() + ".addr"); + LValue DstLV = CGF.MakeAddrLValue(DeclPtr, VDPtrTy, + AlignmentSource::Decl); + CGF.EmitStoreOfScalar(V, DstLV); + llvm::Value *PtrLd = + CGF.EmitLoadOfScalar(DstLV, IInit->getBeginLoc()); + VarAddr = Address(PtrLd, AddrAlign); + if (VD->getType() != VDPtrTy) + VarAddr = castValueFromUintptr( + CGF, (*IRef)->getExprLoc(), VD->getType(), + VD->getName(), CGF.MakeAddrLValue(VarAddr, VDPtrTy)); + CGF.setAddrOfLocalVar(VD, VarAddr); + CGF.setAddrOfLocalVar(VDInit, VarAddr); + const auto *cleanups = + dyn_cast(VD->getInit()); + const Expr *Init = + (cleanups) ? cleanups->getSubExpr() : VD->getInit(); + if (isa(Init) && + !CGF.isTrivialInitializer(Init)) { + if (cleanups) { + CGF.enterFullExpression(cleanups); + RunCleanupsScope InitScope(CGF); + } + + Address DstPtr = CGF.CreateMemTemp(VD->getType(), AddrAlign, + VD->getName()); + CGF.EmitAnyExprToMem(Init, DstPtr, + Init->getType().getQualifiers(), + /*IsInitializer*/ false); + CGF.LocalDeclMap.erase(VDInit); + VarAddr = DstPtr; + // TODO emit cleanup info for variable + } + } + + CGF.LocalDeclMap.erase(VDInit); + if (ThisFirstprivateIsLastprivate && + Lastprivates[OrigVD->getCanonicalDecl()] == + OMPC_LASTPRIVATE_conditional) { + // Create/init special variable for lastprivate conditionals. + Address VDAddr = Address::invalid(); + VDAddr = emitLastprivateConditionalInit(CGF, OrigVD); + llvm::Value *V = CGF.EmitLoadOfScalar( + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), + (*IRef)->getType(), + AlignmentSource::Decl), + (*IRef)->getExprLoc()); + CGF.EmitStoreOfScalar( + V, CGF.MakeAddrLValue(VDAddr, (*IRef)->getType(), + AlignmentSource::Decl)); + CGF.LocalDeclMap.erase(VD); + CGF.setAddrOfLocalVar(VD, VDAddr); + return VDAddr; + } + return CGF.GetAddrOfLocalVar(VD); + }); + } + assert(IsRegistered && + "firstprivate var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + } + ++IRef; + ++InitsRef; + } + } + return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); +} + +void CodeGenFunction::OMPBuilderCBHelpers::GenerateOpenMPCapturedVars( + CodeGenFunction &CGF, const CapturedStmt &S, + llvm::SmallDenseMap + &CapturedVarsValueMap) { + const RecordDecl *RD = S.getCapturedRecordDecl(); + auto CurField = RD->field_begin(); + auto CurCap = S.captures().begin(); + for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), + E = S.capture_init_end(); + I != E; ++I, ++CurField, ++CurCap) { + if (CurField->hasCapturedVLAType() || CurCap->capturesThis()) { + // do nothing + } else if (CurCap->capturesVariableByCopy()) { + llvm::Value *CV = + CGF.EmitLoadOfScalar(CGF.EmitLValue(*I), CurCap->getLocation()); + + // If the field is not a pointer, we need to save the actual value + // and load it as a void pointer. + if (!CurField->getType()->isAnyPointerType()) { + ASTContext &Ctx = CGF.getContext(); + Address DstAddr = CGF.CreateMemTemp( + Ctx.getUIntPtrType(), + Twine(CurCap->getCapturedVar()->getName(), ".casted")); + LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); + + llvm::Value *SrcAddrVal = CGF.EmitScalarConversion( + DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), + Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); + LValue SrcLV = + CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); + + // Store the value using the source type pointer. + CGF.EmitStoreThroughLValue(RValue::get(CV), SrcLV); + + // Load the value using the destination type pointer. + CV = CGF.EmitLoadOfScalar(DstLV, CurCap->getLocation()); + } + CapturedVarsValueMap[CurCap->getCapturedVar()] = { + CV, CapturedVarInfo::ByValue}; + } else { + assert(CurCap->capturesVariable() && "Expected capture by reference."); + CapturedVarsValueMap[CurCap->getCapturedVar()] = { + CGF.EmitLValue(*I).getAddress(CGF).getPointer(), + CapturedVarInfo::ByRef}; + } + } +} + void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { // Check if we have any if clause associated with the directive. @@ -1832,6 +2165,7 @@ CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); + Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, FiniCB, IfCond, NumThreads, ProcBind, S.hasCancel())); @@ -3526,9 +3860,6 @@ if (const auto *HintClause = S.getSingleClause()) Hint = HintClause->getHint(); - // TODO: This is slightly different from what's currently being done in - // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything - // about typing is final. llvm::Value *HintInst = nullptr; if (Hint) HintInst = diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1567,6 +1567,14 @@ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + struct CapturedVarInfo { + + enum CapturedVarKind { ByValue, ByRef }; + + llvm::Value *PassedValue; + CapturedVarKind CapturedKind; + }; + /// Cleanup action for allocate support. class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { @@ -1598,9 +1606,35 @@ /// Gets the OpenMP-specific address of the local variable /p VD. static Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD); + + /// Emit first private clause + /// + /// \param CGF CodeGenFunction for function containing the OMP directive + /// this is associated with \param D The directive the firstprivate is + /// associated with \param PrivateScope for all captured variables in + /// current associated directive \param CapturedVarsInfoMap Map of captured + /// variables and generated associated values + /// + /// \return True if any variable were generated + static bool EmitOMPFirstprivateClause( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + OMPPrivateScope &PrivateScope, + llvm::SmallDenseMap + &CapturedVarsInfoMap); + static bool EmitOMPCopyinClause(CodeGenFunction &CGF, const OMPExecutableDirective &D, InsertPointTy AllocaIP); + + /// Create specialized alloca to handle lastprivate conditionals. + static Address emitLastprivateConditionalInit(CodeGenFunction &CGF, + const VarDecl *VD); + + static void GenerateOpenMPCapturedVars( + CodeGenFunction &CFG, const CapturedStmt &S, + llvm::SmallDenseMap + &CapturedVarsInfoMap); + /// Get the platform-specific name separator. /// \param Parts different parts of the final name that needs separation /// \param FirstSeparator First separator used between the initial two @@ -1610,6 +1644,7 @@ static std::string getNameWithSeparators(ArrayRef Parts, StringRef FirstSeparator = ".", StringRef Separator = "."); + /// Emit the Finalization for an OMP region /// \param CGF The Codegen function this belongs to /// \param IP Insertion point for generating the finalization code. @@ -1713,7 +1748,16 @@ ~InlinedRegionBodyRAII() { CGF.AllocaInsertPt = OldAllocaIP; } }; }; + private: + /// Maps local variables marked as lastprivate conditional to their internal + /// types. + llvm::DenseMap, + std::tuple>> + LastprivateConditionalToTypes; + /// CXXThisDecl - When generating code for a C++ member function, /// this will hold the implicit 'this' declaration. ImplicitParamDecl *CXXABIThisDecl = nullptr; diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -1,8 +1,11 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,ALL-32,CHECK,CHECK-32 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,ALL-32,CHECK,CHECK-32 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA -check-prefix=LAMBDA-32 %s // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS -check-prefix=BLOCKS-32 %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,ALL-32,IRBUILDER,IRBUILDER-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,ALL-32,IRBUILDER,IRBUILDER-32 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-pc-linux-gnu -emit-pch -o %t %s @@ -11,11 +14,14 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,ALL-64,CHECK,CHECK-64 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,ALL-64,CHECK,CHECK-64 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA -check-prefix=LAMBDA-64 %s // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS -check-prefix=BLOCKS-64 %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,ALL-64,IRBUILDER,IRBUILDER-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,ALL-64,IRBUILDER,IRBUILDER-64 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-pc-linux-gnu -emit-pch -o %t %s @@ -62,6 +68,7 @@ int e[4]; SS(int &d) : a(0), b(0), c(d) { #pragma omp parallel firstprivate(a, b, c, e) + #ifdef LAMBDA [&]() { ++this->a, --b, (this)->c /= 1; @@ -119,12 +126,12 @@ ~S() {} }; -// CHECK: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 +// ALL: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 // LAMBDA: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 // BLOCKS: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8 -// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } -// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } -// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} } +// ALL-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// ALL-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +// ALL-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} } template T tmain() { @@ -342,43 +349,72 @@ #endif } -// CHECK: define {{.*}}i{{[0-9]+}} @main() -// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: [[T_VAR:%.+]] = alloca i32, -// CHECK: [[T_VARCAST:%.+]] = alloca [[iz:i64|i32]], -// CHECK: [[SIVARCAST:%.+]] = alloca [[iz]], -// CHECK: [[A:%.+]] = alloca i32, -// CHECK: [[T_VARCAST1:%.+]] = alloca [[iz:i64|i32]], -// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) -// CHECK: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]], -// CHECK-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST]] to i32* -// CHECK-64: store i32 [[T_VARVAL]], i32* [[T_VARCONV]], -// CHECK-32: store i32 [[T_VARVAL]], i32* [[T_VARCAST]], -// CHECK: [[T_VARPVT:%.+]] = load [[iz]], [[iz]]* [[T_VARCAST]], -// CHECK: [[SIVARVAL:%.+]] = load i32, i32* @{{.+}}, -// CHECK-64: [[SIVARCONV:%.+]] = bitcast i64* [[SIVARCAST]] to i32* -// CHECK-64: store i32 [[SIVARVAL]], i32* [[SIVARCONV]], -// CHECK-32: store i32 [[SIVARVAL]], i32* [[SIVARCAST]], -// CHECK: [[SIVARPVT:%.+]] = load [[iz]], [[iz]]* [[SIVARCAST]], +// ALL: define {{.*}}i{{[0-9]+}} @main() +// ALL: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// ALL: [[T_VAR:%.+]] = alloca i32, +// ALL: [[T_VARCAST:%.+]] = alloca [[iz:i64|i32]], +// ALL: [[SIVARCAST:%.+]] = alloca [[iz]], +// ALL: [[A:%.+]] = alloca i32, +// ALL: [[T_VARCAST1:%.+]] = alloca [[iz:i64|i32]], +// ALL: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// ALL: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]], +// ALL-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST]] to i32* +// ALL-64: store i32 [[T_VARVAL]], i32* [[T_VARCONV]], +// ALL-32: store i32 [[T_VARVAL]], i32* [[T_VARCAST]], +// ALL: [[T_VARPVT:%.+]] = load [[iz]], [[iz]]* [[T_VARCAST]], +// ALL: [[SIVARVAL:%.+]] = load i32, i32* @{{.+}}, +// ALL-64: [[SIVARCONV:%.+]] = bitcast i64* [[SIVARCAST]] to i32* +// ALL-64: store i32 [[SIVARVAL]], i32* [[SIVARCONV]], +// ALL-32: store i32 [[SIVARVAL]], i32* [[SIVARCAST]], +// ALL: [[SIVARPVT:%.+]] = load [[iz]], [[iz]]* [[SIVARCAST]], // CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, [[iz]], [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[MAIN_MICROTASK:@.+]] to void {{.*}}[[iz]] [[T_VARPVT]],{{.*}}[[iz]] [[SIVARPVT]] -// CHECK: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]], -// CHECK-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST1]] to i32* -// CHECK-64: store i32 [[T_VARVAL]], i32* [[T_VARCONV]], -// CHECK-32: store i32 [[T_VARVAL]], i32* [[T_VARCAST1]], -// CHECK: [[T_VARPVT:%.+]] = load [[iz]], [[iz]]* [[T_VARCAST1]], -// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[iz]])* [[MAIN_MICROTASK1:@.+]] to void {{.*}}[[iz]] [[T_VARPVT]]) -// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]() -// CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* -// CHECK: ret +// IRBUILDER: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[iz]], [2 x i32]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[MAIN_MICROTASK:@.+]] to void {{.*}}{{.*}}[[iz]] [[T_VARPVT]],{{.*}}[[iz]] [[SIVARPVT]] +// ALL: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]], +// ALL-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST1]] to i32* +// ALL-64: store i32 [[T_VARVAL]], i32* [[T_VARCONV]], +// ALL-32: store i32 [[T_VARVAL]], i32* [[T_VARCAST1]], +// ALL: [[T_VARPVT:%.+]] = load [[iz]], [[iz]]* [[T_VARCAST1]], +// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[iz]])* [[MAIN_MICROTASK1:@.+]] to void {{.*}}[[iz]] [[T_VARPVT]]) +// ALL: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]() +// ALL: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* +// ALL: ret +// +// IRBUILDER: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[iz]] [[T_VAR:%.+]]) +// IRBUILDER: [[GTID_LOCAL:%tid.addr.+]] = alloca i32 +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] +// IRBUILDER: store [[iz]] [[T_VAR]], [[iz]]* [[T_VAR_ADDR:%.+]], +// IRBUILDER-64: [[BC:%.+]] = bitcast [[iz]]* [[T_VAR_ADDR]] to i32* +// IRBUILDER: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// IRBUILDER: [[T_VAR_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], [[iz]] 4, i8* inttoptr ([[iz]] 1 to i8*)) +// IRBUILDER: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// IRBUILDER: [[T_VAR_PRIV:%.+]] = bitcast i8* [[T_VAR_VOID_PTR]] to i32* +// IRBUILDER-32: [[T_VAR_VAL:%.+]] = load i32, i32* [[T_VAR_ADDR]], +// IRBUILDER-64: [[T_VAR_VAL:%.+]] = load i32, i32* [[BC]], +// IRBUILDER: store i32 [[T_VAR_VAL]], i32* [[T_VAR_PRIV]], +// IRBUILDER: ret void +// IRBUILDER: store i32 0, i32* [[T_VAR_PRIV]], +// IRBUILDER: call void @__kmpc_free(i32 [[GTID]], i8* [[T_VAR_VOID_PTR]], i8* inttoptr ([[iz]] 1 to i8*)) // // CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i32]* nonnull align 4 dereferenceable(8) %{{.+}}, [[iz]] {{.*}}%{{.+}}, [2 x [[S_FLOAT_TY]]]* nonnull align 4 dereferenceable(8) %{{.+}}, [[S_FLOAT_TY]]* nonnull align 4 dereferenceable(4) %{{.+}}, [[iz]] {{.*}}[[SIVAR:%.+]]) -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// IRBUILDER: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[iz]] {{.*}}%{{.+}}, [2 x i32]* [[VEC_REF:%.+]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF:%.+]], [[S_FLOAT_TY]]* %{{.+}}, [[iz]] {{.*}}[[SIVAR:%.+]]) +// ALL: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[SIVAR7_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], -// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], -// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], +// ALL: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// ALL: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// IRBUILDER: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// ALL: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// IRBUILDER: [[SIVAR7_PRIV:%.+]] = alloca i{{[0-9]+}}, +// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32 +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] +// IRBUILDER-64: [[T_VAR_CONV:%.+]] = bitcast i64* [[T_VAR_PRIV]] to i32* +// IRBUILDER: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// IRBUILDER: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], // CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** % // CHECK-NOT: load i{{[0-9]+}}*, i{{[0-9]+}}** % // CHECK-64: [[T_VAR_CONV:%.+]] = bitcast i64* [[T_VAR_PRIV]] to i32* @@ -386,31 +422,35 @@ // CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** % // CHECK-NOT: load i{{[0-9]+}}*, i{{[0-9]+}}** % // CHECK-64: [[SIVAR7_CONV:%.+]] = bitcast i64* [[SIVAR7_PRIV]] to i32* + // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* -// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]], -// CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]] to [[S_FLOAT_TY]]* -// CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 -// CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] -// CHECK: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] -// CHECK: [[S_ARR_BODY]] -// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) -// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]]) -// CHECK: call {{.*}} [[ST_TY_DESTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP]]) -// CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] -// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) -// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]]) +// ALL: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]], +// ALL: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// ALL: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]] to [[S_FLOAT_TY]]* +// ALL: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 +// ALL: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] +// ALL: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] +// IRBUILDER: ret void +// ALL: [[S_ARR_BODY]] +// ALL: call {{.*}} [[ST_TY_DEFAULT_CONSTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) +// ALL: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]]) +// ALL: call {{.*}} [[ST_TY_DESTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP]]) +// ALL: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] +// IRBUILDER: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** % +// ALL: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) +// ALL: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]]) +// IRBUILDER-64: [[SIVAR7_CONV:%.+]] = bitcast i64* [[SIVAR7_PRIV]] to i32* // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]]) -// CHECK-64: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR7_CONV]], -// CHECK-32: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR7_PRIV]], +// ALL-64: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR7_CONV]], +// ALL-32: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR7_PRIV]], +// IRBUILDER: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]]) // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) -// CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* +// ALL-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* // CHECK: ret void - // CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[iz]] [[T_VAR:%.+]]) // CHECK: [[GTID_ADDR:%.+]] = alloca i32*, // CHECK: store [[iz]] [[T_VAR]], [[iz]]* [[T_VAR_ADDR:%.+]], @@ -426,13 +466,13 @@ // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[T_VAR_VOID_PTR]], i8* inttoptr ([[iz]] 1 to i8*)) // CHECK: ret void - -// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() -// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], -// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// ALL: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() +// ALL: [[TEST:%.+]] = alloca [[S_INT_TY]], +// ALL: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) // CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void -// CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* -// CHECK: ret +// IRBUILDER: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void +// ALL: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* +// ALL: ret i{{[0-9]+}} // // CHECK: define {{.+}} @{{.+}}([[SS_TY]]* // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 @@ -446,14 +486,70 @@ // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*, [[iz]], [[iz]], [[iz]], [4 x i32]*)* [[SS_MICROTASK:@.+]] to void // CHECK: ret +// IRBUILDER: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32* %{{.+}}, [2 x i32]* %{{.+}}, [2 x [[S_INT_TY]]]* %{{.+}}, [[S_INT_TY]]* %{{.+}}) +// IRBUILDER: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}*, align 128 +// IRBUILDER: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], align 128 +// IRBUILDER: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], align 128 +// IRBUILDER: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128 +// IRBUILDER: [[GTID_LOCAL:%tid.addr.+]] = alloca i32 +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] +// IRBUILDER: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** % +// IRBUILDER: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// IRBUILDER: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* +// IRBUILDER: call void @llvm.memcpy.{{.+}}(i8* align 128 [[VEC_DEST]], i8* align 128 [[VEC_SRC]], i{{[0-9]+}} {{[0-9]+}}, i1 +// IRBUILDER: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// IRBUILDER: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_REF]] to [[S_INT_TY]]* +// IRBUILDER: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 +// IRBUILDER: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] +// IRBUILDER: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] + +// IRBUILDER: ret void + +// IRBUILDER: [[S_ARR_BODY]] +// IRBUILDER: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) +// IRBUILDER: call {{.*}} [[S_INT_TY_COPY_CONSTR:@.+]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]]) +// IRBUILDER: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]]) +// IRBUILDER: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] +// IRBUILDER: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** % +// IRBUILDER: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) +// IRBUILDER: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]]) +// IRBUILDER: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]], +// IRBUILDER: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* +// IRBUILDER: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]]) +// IRBUILDER-NOT: call {{.*}}void @__kmpc_barrier( + +// IRBUILDER-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* + +// IRBUILDER: define {{.+}} @{{.+}}([[SS_TY]]* +// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 +// IRBUILDER: store i{{[0-9]+}} 0, i{{[0-9]+}}* % +// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1 +// IRBUILDER: store i8 +// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2 +// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0 +// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1 +// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2 +// IRBUILDER: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32**, [[iz]], [[iz]], i32**, [[iz]], [4 x i32]**)* [[SS_MICROTASK:@.+]] to void +// IRBUILDER: ret + // CHECK: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, [[iz]] {{.+}}, [[iz]] {{.+}}, [[iz]] {{.+}}, [4 x i{{[0-9]+}}]* {{.+}}) -// CHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: [[E_PRIV:%.+]] = alloca [4 x i{{[0-9]+}}], -// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[A_PRIV]] -// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[B_PRIV]] -// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[C_PRIV]] +// IRBUILDER: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32** %{{.+}}, [[iz]] {{.+}}, [[iz]] {{.+}}, i32** %{{.+}}, [[iz]] {{.+}}, [4 x i{{[0-9]+}}]** {{.+}}) +// ALL: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, +// ALL: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, +// ALL: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, +// ALL: [[E_PRIV:%.+]] = alloca [4 x i{{[0-9]+}}], +// ALL: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[A_PRIV]] +// IRBUILDER-64: [[A_CONV:%.+]] = bitcast i64* [[A_PRIV:%.+]] to i32* +// IRBUILDER-64: store i32* [[A_CONV]], i32** [[REFA:%.+]], +// IRBUILDER-32: store i32* [[A_PRIV]], i32** [[REFA:%.+]], +// ALL: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[B_PRIV]] +// IRBUILDER-64: [[B_CONV:%.+]] = bitcast i64* [[B_PRIV:%.+]] to i32* +// ALL: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[C_PRIV]] +// IRBUILDER-64: [[C_CONV:%.+]] = bitcast i64* [[C_PRIV:%.+]] to i32* +// IRBUILDER-64: store i32* [[C_CONV]], i32** [[REFC:%.+]], +// IRBUILDER-32: store i32* [[C_PRIV]], i32** [[REFC:%.+]], // CHECK-64: [[A_CONV:%.+]] = bitcast i64* [[A_PRIV:%.+]] to i32* // CHECK-64: [[B_CONV:%.+]] = bitcast i64* [[B_PRIV:%.+]] to i32* // CHECK-64: [[C_CONV:%.+]] = bitcast i64* [[C_PRIV:%.+]] to i32*