diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1184,51 +1184,6 @@ return UDRMap.lookup(D); } -namespace { -// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR -// Builder if one is present. -struct PushAndPopStackRAII { - PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, - bool HasCancel, llvm::omp::Directive Kind) - : OMPBuilder(OMPBuilder) { - if (!OMPBuilder) - return; - - // The following callback is the crucial part of clangs cleanup process. - // - // NOTE: - // Once the OpenMPIRBuilder is used to create parallel regions (and - // similar), the cancellation destination (Dest below) is determined via - // IP. That means if we have variables to finalize we split the block at IP, - // use the new block (=BB) as destination to build a JumpDest (via - // getJumpDestInCurrentScope(BB)) which then is fed to - // EmitBranchThroughCleanup. Furthermore, there will not be the need - // to push & pop an FinalizationInfo object. - // The FiniCB will still be needed but at the point where the - // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. - auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { - assert(IP.getBlock()->end() == IP.getPoint() && - "Clang CG should cause non-terminated block!"); - CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - CGF.Builder.restoreIP(IP); - CodeGenFunction::JumpDest Dest = - CGF.getOMPCancelDestination(OMPD_parallel); - CGF.EmitBranchThroughCleanup(Dest); - }; - - // TODO: Remove this once we emit parallel regions through the - // OpenMPIRBuilder as it can do this setup internally. - llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); - OMPBuilder->pushFinalizationCB(std::move(FI)); - } - ~PushAndPopStackRAII() { - if (OMPBuilder) - OMPBuilder->popFinalizationCB(); - } - llvm::OpenMPIRBuilder *OMPBuilder; -}; -} // namespace - static llvm::Function *emitParallelOrTeamsOutlinedFunction( CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -1256,10 +1211,6 @@ dyn_cast(&D)) HasCancel = OPFD->hasCancel(); - // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new - // parallel region to make cancellation barriers work properly. - llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); - PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1540,6 +1540,8 @@ CodeGenFunction &CGF, const OMPExecutableDirective &S, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, const CodeGenBoundParametersTy &CodeGenBoundParameters) { + CodeGenFunction::NonOpenMPIRBuilderRegion NonBuilderScope(CGF); + const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); llvm::Value *NumThreads = nullptr; llvm::Function *OutlinedFn = @@ -1713,7 +1715,8 @@ } void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { - if (CGM.getLangOpts().OpenMPIRBuilder) { + if (CGM.getLangOpts().OpenMPIRBuilder && + !IsInsideNonOpenMPIRBuilderHandledRegion) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); // Check if we have any if clause associated with the directive. llvm::Value *IfCond = nullptr; @@ -3729,6 +3732,8 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF, const OMPLoopDirective &S, bool HasCancel) { + CodeGenFunction::NonOpenMPIRBuilderRegion NonOmpBuilderScope(CGF); + bool HasLastprivates; if (llvm::any_of(S.getClausesOfKind(), [](const OMPReductionClause *C) { @@ -3906,6 +3911,8 @@ } void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { + NonOpenMPIRBuilderRegion NonOmpBuilderScope(*this); + const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); const auto *CS = dyn_cast(CapturedStmt); bool HasLastprivates = false; @@ -4121,7 +4128,8 @@ } void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { - if (CGM.getLangOpts().OpenMPIRBuilder) { + if (CGM.getLangOpts().OpenMPIRBuilder && + !IsInsideNonOpenMPIRBuilderHandledRegion) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; @@ -4132,8 +4140,14 @@ auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section"); + Builder.restoreIP(CodeGenIP); + llvm::BasicBlock *FiniBB = + splitBBWithSuffix(Builder, false, ".sectionfini"); + + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, *FiniBB); + EmitStmt(SectionRegionBodyStmt); + + Builder.CreateBr(FiniBB); }; LexicalScope Scope(*this, S.getSourceRange()); @@ -4485,6 +4499,8 @@ const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, OMPTaskDataTy &Data) { + NonOpenMPIRBuilderRegion NonOmpBuilderScope(*this); + // Emit outlined function for task construct. const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); auto I = CS->getCapturedDecl()->param_begin(); @@ -5027,6 +5043,8 @@ } void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { + NonOpenMPIRBuilderRegion NonOmpBuilderScope(*this); + // Emit outlined function for task construct. const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); Address CapturedStruct = GenerateCapturedStmtArgument(*CS); @@ -5044,6 +5062,7 @@ // Check if we should emit tied or untied task. Data.Tied = !S.getSingleClause(); auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { + NonOpenMPIRBuilderRegion NonOmpBuilderScope(CGF); CGF.EmitStmt(CS->getCapturedStmt()); }; auto &&TaskGen = [&S, SharedsTy, CapturedStruct, @@ -6910,20 +6929,14 @@ break; } } - if (CGM.getLangOpts().OpenMPIRBuilder) { + if (CGM.getLangOpts().OpenMPIRBuilder && + !IsInsideNonOpenMPIRBuilderHandledRegion) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); - // TODO: This check is necessary as we only generate `omp parallel` through - // the OpenMPIRBuilder for now. - if (S.getCancelRegion() == OMPD_parallel || - S.getCancelRegion() == OMPD_sections || - S.getCancelRegion() == OMPD_section) { - llvm::Value *IfCondition = nullptr; - if (IfCond) - IfCondition = EmitScalarExpr(IfCond, - /*IgnoreResultAssign=*/true); - return Builder.restoreIP( - OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); - } + llvm::Value *IfCondition = nullptr; + if (IfCond) + IfCondition = EvaluateExprAsBool(IfCond); + return Builder.restoreIP( + OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); } CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -489,6 +489,32 @@ ~CGCapturedStmtRAII() { CGF.CapturedStmtInfo = PrevCapturedStmtInfo; } }; + /// While in a region handled by Clang's CGOpenMPRuntime, do not use the + /// OpenMPIRBuilder which requires all surrounding regions to be handled by + /// OpenMPIRBuilder as well. + /// + /// Required until everything can be handled by OpenMPIRBuilder. + /// Isn't the ultimate solution to mixing OpenMPIRBuilder and + /// non-OpenMPIRBuilder codegen either, but works with the current regression + /// tests so far. + bool IsInsideNonOpenMPIRBuilderHandledRegion = false; + class NonOpenMPIRBuilderRegion { + private: + CodeGenFunction &CGF; + bool PreviousIsInsideNonOpenMPIRBuilderHandledRegion; + + public: + NonOpenMPIRBuilderRegion(CodeGenFunction &CGF) + : CGF(CGF), PreviousIsInsideNonOpenMPIRBuilderHandledRegion( + CGF.IsInsideNonOpenMPIRBuilderHandledRegion) { + CGF.IsInsideNonOpenMPIRBuilderHandledRegion = true; + } + ~NonOpenMPIRBuilderRegion() { + CGF.IsInsideNonOpenMPIRBuilderHandledRegion = + PreviousIsInsideNonOpenMPIRBuilderHandledRegion; + } + }; + /// An abstract representation of regular/ObjC call/message targets. class AbstractCallee { /// The function declaration of the callee. @@ -1776,16 +1802,11 @@ /// \param IP Insertion point for generating the finalization code. static void FinalizeOMPRegion(CodeGenFunction &CGF, InsertPointTy IP) { CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - assert(IP.getBlock()->end() != IP.getPoint() && - "OpenMP IR Builder should cause terminated block!"); - llvm::BasicBlock *IPBB = IP.getBlock(); - llvm::BasicBlock *DestBB = IPBB->getUniqueSuccessor(); - assert(DestBB && "Finalization block should have one successor!"); + CGF.Builder.restoreIP(IP); + llvm::BasicBlock *DestBB = + llvm::splitBB(CGF.Builder, /*CreateBranch*/ false, ".ompfinalize"); - // erase and replace with cleanup branch. - IPBB->getTerminator()->eraseFromParent(); - CGF.Builder.SetInsertPoint(IPBB); CodeGenFunction::JumpDest Dest = CGF.getJumpDestInCurrentScope(DestBB); CGF.EmitBranchThroughCleanup(Dest); } diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -1329,20 +1329,20 @@ // CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LASTITER30:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LOWERBOUND31:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_UPPERBOUND32:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_STRIDE33:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I41:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -1365,8 +1365,8 @@ // CHECK3-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4 // CHECK3-NEXT: store i32 0, i32* [[P_UPPERBOUND]], align 4 // CHECK3-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]] @@ -1386,11 +1386,13 @@ // CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]] // CHECK3-NEXT: ] // CHECK3: omp_section_loop.body.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 3) // CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case.split: +// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case.cncl: +// CHECK3-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK3: omp_section_loop.body.case.cont: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK3: omp_section_loop.body.case.section.after: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -1400,97 +1402,109 @@ // CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1 // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] // CHECK3: omp_section_loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM14]]) // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK3: omp_section_loop.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK3: omp_section_loop.aftersections.fini: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK3: omp_section_loop.preheader13: -// CHECK3-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, i32* [[P_LASTITER28]], i32* [[P_LOWERBOUND29]], i32* [[P_UPPERBOUND30]], i32* [[P_STRIDE31]], i32 1, i32 0) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND29]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 +// CHECK3-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK3: section_finish: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE15:%.*]] +// CHECK3: .ompfinalize15: +// CHECK3-NEXT: br label [[SECTION_FINI]] +// CHECK3: section_fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER16:%.*]] +// CHECK3: omp_section_loop.preheader16: +// CHECK3-NEXT: store i32 0, i32* [[P_LOWERBOUND31]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[P_UPPERBOUND32]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[P_STRIDE33]], align 4 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM34:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]], i32 34, i32* [[P_LASTITER30]], i32* [[P_LOWERBOUND31]], i32* [[P_UPPERBOUND32]], i32* [[P_STRIDE33]], i32 1, i32 0) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND31]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND32]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK3-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK3: omp_section_loop.header14: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK3: omp_section_loop.cond15: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK3: omp_section_loop.body16: -// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER17:%.*]] +// CHECK3: omp_section_loop.header17: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV23:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER16]] ], [ [[OMP_SECTION_LOOP_NEXT25:%.*]], [[OMP_SECTION_LOOP_INC20:%.*]] ] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND18:%.*]] +// CHECK3: omp_section_loop.cond18: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP24:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV23]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP24]], label [[OMP_SECTION_LOOP_BODY19:%.*]], label [[OMP_SECTION_LOOP_EXIT21:%.*]] +// CHECK3: omp_section_loop.body19: +// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV23]], [[TMP9]] // CHECK3-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK3-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER:%.*]] [ +// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE28:%.*]] // CHECK3-NEXT: ] -// CHECK3: omp_section_loop.body.case23: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case23.split: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case23.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body.case25: +// CHECK3: omp_section_loop.body.case26: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case26.cncl: +// CHECK3-NEXT: br label [[SECTION_FINI37:%.*]] +// CHECK3: omp_section_loop.body.case26.cont: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case26.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body.case28: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM29:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM29]], i32 3) // CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case25.split: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK3: omp_section_loop.body.case25.section.after26: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case25.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body16.sections.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK3: omp_section_loop.inc17: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK3: omp_section_loop.exit18: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK3: omp_section_loop.after19: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK3: omp_section_loop.after19sections.fini: +// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE28_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE28_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case28.cncl: +// CHECK3-NEXT: br label [[SECTION_FINI37]] +// CHECK3: omp_section_loop.body.case28.cont: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTIONFINI:%.*]] +// CHECK3: omp_section_loop.body.case28.sectionfini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case28.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body19.sections.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC20]] +// CHECK3: omp_section_loop.inc20: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT25]] = add nuw i32 [[OMP_SECTION_LOOP_IV23]], 1 +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER17]] +// CHECK3: omp_section_loop.exit21: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM35:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM35]]) +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER22:%.*]] +// CHECK3: omp_section_loop.after22: +// CHECK3-NEXT: br label [[SECTION_FINISH36:%.*]] +// CHECK3: section_finish36: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK3: .ompfinalize38: +// CHECK3-NEXT: br label [[SECTION_FINI37]] +// CHECK3: section_fini37: // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[SUB40:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB40]], i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK3-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 +// CHECK3-NEXT: [[CMP43:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP43]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: // CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -1504,29 +1518,23 @@ // CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK3-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[CMP44:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP44]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK3-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK3-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD45]], i32* [[I41]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK3-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TOBOOL46:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK3-NEXT: br i1 [[TOBOOL46]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 2) // CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK3-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK3: omp_section_loop.body.case.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK3: omp_section_loop.body.case23.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK3: omp_section_loop.body.case25.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -1537,30 +1545,30 @@ // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK3-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD48:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: store i32 [[ADD48]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: omp.precond.end: // CHECK3-NEXT: br label [[CANCEL_CONT]] // CHECK3: cancel.cont: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM51]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM52:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM52]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK3-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM53:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM53]], i8* [[TMP36]]) // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK3-NEXT: store i32 0, i32* [[R]], align 4 @@ -1584,50 +1592,58 @@ // CHECK3: omp.par.region: // CHECK3-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK3: 3: -// CHECK3-NEXT: br label [[TMP4:%.*]] -// CHECK3: 4: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK3: omp.par.region1: +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 // CHECK3-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK3: .cncl5: +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK3-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK3-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK3: omp.par.region1.cncl: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE11:%.*]] +// CHECK3: .ompfinalize11: +// CHECK3-NEXT: br label [[OMP_PAR_REGION1_CNCL_FINISPLIT:%.*]] +// CHECK3: omp.par.region1.cncl.finisplit: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK3: .cont: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] -// CHECK3-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK3-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 +// CHECK3: omp.par.region1.cont: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1 +// CHECK3-NEXT: [[CONV7:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP8]] +// CHECK3-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD]] to i8 +// CHECK3-NEXT: store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1 // CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3: .ompfinalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK3: 14: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK3: .cncl: +// CHECK3: omp.par.region.if: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK3-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK3: omp.par.region.if.cncl: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE9:%.*]] +// CHECK3: .ompfinalize9: +// CHECK3-NEXT: br label [[OMP_PAR_REGION_IF_CNCL_FINISPLIT:%.*]] +// CHECK3: omp.par.region.if.cncl.finisplit: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK3: .split: -// CHECK3-NEXT: br label [[TMP4]] +// CHECK3: omp.par.region.if.cont: +// CHECK3-NEXT: br label [[OMP_PAR_REGION1]] // CHECK3: omp.par.outlined.exit.exitStub: // CHECK3-NEXT: ret void // @@ -1716,14 +1732,14 @@ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK3: .omp.sections.case.split: +// CHECK3-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3: .cancel.exit: +// CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case.cncl: -// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1732,13 +1748,13 @@ // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK3-NEXT: br label [[CANCEL_CONT]] +// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // @@ -1759,7 +1775,7 @@ // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -1780,25 +1796,23 @@ // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK3: .omp.sections.case.split: +// CHECK3-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3: .cancel.exit: +// CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case.cncl: -// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.case2: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK3: .omp.sections.case2.split: -// CHECK3-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK3: .omp.sections.case2.section.after: +// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK3: .cancel.exit4: +// CHECK3-NEXT: br label [[CANCEL_EXIT]] +// CHECK3: .cancel.continue5: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case2.cncl: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1807,14 +1821,14 @@ // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK3-NEXT: br label [[CANCEL_CONT]] +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // // @@ -1861,7 +1875,7 @@ // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -1889,7 +1903,7 @@ // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK3-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -1911,14 +1925,14 @@ // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK3-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -1928,10 +1942,10 @@ // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.reduction.case2: @@ -1981,20 +1995,20 @@ // CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LASTITER30:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LOWERBOUND31:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_UPPERBOUND32:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_STRIDE33:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I41:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -2017,8 +2031,8 @@ // CHECK4-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4 // CHECK4-NEXT: store i32 0, i32* [[P_UPPERBOUND]], align 4 // CHECK4-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]] @@ -2038,11 +2052,13 @@ // CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]] // CHECK4-NEXT: ] // CHECK4: omp_section_loop.body.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 3) // CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case.split: +// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case.cncl: +// CHECK4-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK4: omp_section_loop.body.case.cont: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK4: omp_section_loop.body.case.section.after: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -2052,97 +2068,109 @@ // CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1 // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] // CHECK4: omp_section_loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM14]]) // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK4: omp_section_loop.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK4: omp_section_loop.aftersections.fini: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK4: omp_section_loop.preheader13: -// CHECK4-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, i32* [[P_LASTITER28]], i32* [[P_LOWERBOUND29]], i32* [[P_UPPERBOUND30]], i32* [[P_STRIDE31]], i32 1, i32 0) -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND29]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 +// CHECK4-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK4: section_finish: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE15:%.*]] +// CHECK4: .ompfinalize15: +// CHECK4-NEXT: br label [[SECTION_FINI]] +// CHECK4: section_fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER16:%.*]] +// CHECK4: omp_section_loop.preheader16: +// CHECK4-NEXT: store i32 0, i32* [[P_LOWERBOUND31]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[P_UPPERBOUND32]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[P_STRIDE33]], align 4 +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM34:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]], i32 34, i32* [[P_LASTITER30]], i32* [[P_LOWERBOUND31]], i32* [[P_UPPERBOUND32]], i32* [[P_STRIDE33]], i32 1, i32 0) +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND31]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND32]], align 4 // CHECK4-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK4: omp_section_loop.header14: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK4: omp_section_loop.cond15: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK4-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK4: omp_section_loop.body16: -// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER17:%.*]] +// CHECK4: omp_section_loop.header17: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_IV23:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER16]] ], [ [[OMP_SECTION_LOOP_NEXT25:%.*]], [[OMP_SECTION_LOOP_INC20:%.*]] ] +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_COND18:%.*]] +// CHECK4: omp_section_loop.cond18: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP24:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV23]], [[TMP12]] +// CHECK4-NEXT: br i1 [[OMP_SECTION_LOOP_CMP24]], label [[OMP_SECTION_LOOP_BODY19:%.*]], label [[OMP_SECTION_LOOP_EXIT21:%.*]] +// CHECK4: omp_section_loop.body19: +// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV23]], [[TMP9]] // CHECK4-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK4-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK4-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK4-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK4-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER:%.*]] [ +// CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK4-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE28:%.*]] // CHECK4-NEXT: ] -// CHECK4: omp_section_loop.body.case23: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) -// CHECK4-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK4-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case23.split: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK4: omp_section_loop.body.case23.section.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK4: omp_section_loop.body.case25: +// CHECK4: omp_section_loop.body.case26: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK4-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 +// CHECK4-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case26.cncl: +// CHECK4-NEXT: br label [[SECTION_FINI37:%.*]] +// CHECK4: omp_section_loop.body.case26.cont: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case26.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body.case28: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM29:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM29]], i32 3) // CHECK4-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK4-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case25.split: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK4: omp_section_loop.body.case25.section.after26: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK4: omp_section_loop.body.case25.section.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK4: omp_section_loop.body16.sections.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK4: omp_section_loop.inc17: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK4: omp_section_loop.exit18: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK4: omp_section_loop.after19: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK4: omp_section_loop.after19sections.fini: +// CHECK4-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE28_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE28_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case28.cncl: +// CHECK4-NEXT: br label [[SECTION_FINI37]] +// CHECK4: omp_section_loop.body.case28.cont: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTIONFINI:%.*]] +// CHECK4: omp_section_loop.body.case28.sectionfini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case28.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body19.sections.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC20]] +// CHECK4: omp_section_loop.inc20: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT25]] = add nuw i32 [[OMP_SECTION_LOOP_IV23]], 1 +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER17]] +// CHECK4: omp_section_loop.exit21: +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM35:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM35]]) +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER22:%.*]] +// CHECK4: omp_section_loop.after22: +// CHECK4-NEXT: br label [[SECTION_FINISH36:%.*]] +// CHECK4: section_finish36: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK4: .ompfinalize38: +// CHECK4-NEXT: br label [[SECTION_FINI37]] +// CHECK4: section_fini37: // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[SUB40:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB40]], i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK4-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK4-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK4-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 +// CHECK4-NEXT: [[CMP43:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK4-NEXT: br i1 [[CMP43]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: // CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -2156,29 +2184,23 @@ // CHECK4: omp.inner.for.cond: // CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK4-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[CMP44:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK4-NEXT: br i1 [[CMP44]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK4-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK4-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD45]], i32* [[I41]], align 4 // CHECK4-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK4-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK4-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK4-NEXT: [[TOBOOL46:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK4-NEXT: br i1 [[TOBOOL46]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 2) // CHECK4-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK4-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK4: .cancel.exit: // CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK4: omp_section_loop.body.case.cncl: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK4: omp_section_loop.body.case23.cncl: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK4: omp_section_loop.body.case25.cncl: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -2189,30 +2211,30 @@ // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: // CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK4-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD48:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK4-NEXT: store i32 [[ADD48]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: omp.precond.end: // CHECK4-NEXT: br label [[CANCEL_CONT]] // CHECK4: cancel.cont: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK4-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM51]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM52:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK4-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM52]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK4-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK4-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK4-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM53:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK4-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM53]], i8* [[TMP36]]) // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK4-NEXT: store i32 0, i32* [[R]], align 4 @@ -2236,50 +2258,58 @@ // CHECK4: omp.par.region: // CHECK4-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK4-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK4-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK4: 3: -// CHECK4-NEXT: br label [[TMP4:%.*]] -// CHECK4: 4: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK4-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK4: omp.par.region1: +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 // CHECK4-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK4-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK4-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK4: .cncl5: +// CHECK4-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK4-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK4-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK4: omp.par.region1.cncl: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE11:%.*]] +// CHECK4: .ompfinalize11: +// CHECK4-NEXT: br label [[OMP_PAR_REGION1_CNCL_FINISPLIT:%.*]] +// CHECK4: omp.par.region1.cncl.finisplit: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK4: .cont: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK4-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK4-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] -// CHECK4-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK4-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 +// CHECK4: omp.par.region1.cont: +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8 +// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1 +// CHECK4-NEXT: [[CONV7:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP8]] +// CHECK4-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD]] to i8 +// CHECK4-NEXT: store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1 // CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK4: omp.par.region.parallel.after: // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4: .ompfinalize: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK4: 14: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK4-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK4-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK4: .cncl: +// CHECK4: omp.par.region.if: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK4-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK4-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK4: omp.par.region.if.cncl: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE9:%.*]] +// CHECK4: .ompfinalize9: +// CHECK4-NEXT: br label [[OMP_PAR_REGION_IF_CNCL_FINISPLIT:%.*]] +// CHECK4: omp.par.region.if.cncl.finisplit: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK4: .split: -// CHECK4-NEXT: br label [[TMP4]] +// CHECK4: omp.par.region.if.cont: +// CHECK4-NEXT: br label [[OMP_PAR_REGION1]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // @@ -2368,14 +2398,14 @@ // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK4: .omp.sections.case.split: +// CHECK4-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK4: .cancel.exit: +// CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case.cncl: -// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: @@ -2384,13 +2414,13 @@ // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK4-NEXT: br label [[CANCEL_CONT]] +// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: cancel.cont: // CHECK4-NEXT: ret void // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK4-NEXT: br label [[CANCEL_CONT]] // @@ -2411,7 +2441,7 @@ // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -2432,25 +2462,23 @@ // CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK4: .omp.sections.case.split: +// CHECK4-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK4: .cancel.exit: +// CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case.cncl: -// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.sections.case2: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK4-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK4-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK4-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK4: .omp.sections.case2.split: -// CHECK4-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK4: .omp.sections.case2.section.after: +// CHECK4-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK4-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK4: .cancel.exit4: +// CHECK4-NEXT: br label [[CANCEL_EXIT]] +// CHECK4: .cancel.continue5: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case2.cncl: -// CHECK4-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: @@ -2459,14 +2487,14 @@ // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK4-NEXT: br label [[CANCEL_CONT]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: cancel.cont: // CHECK4-NEXT: ret void // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK4-NEXT: br label [[CANCEL_CONT]] // // @@ -2513,7 +2541,7 @@ // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -2541,7 +2569,7 @@ // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK4-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK4-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -2563,14 +2591,14 @@ // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK4-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK4-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK4-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2580,10 +2608,10 @@ // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK4-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.reduction.case2: @@ -3873,20 +3901,20 @@ // CHECK9-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_LASTITER30:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_LOWERBOUND31:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_UPPERBOUND32:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_STRIDE33:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I41:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -3909,8 +3937,8 @@ // CHECK9-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4 // CHECK9-NEXT: store i32 0, i32* [[P_UPPERBOUND]], align 4 // CHECK9-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]] @@ -3930,11 +3958,13 @@ // CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]] // CHECK9-NEXT: ] // CHECK9: omp_section_loop.body.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 3) // CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case.split: +// CHECK9-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case.cncl: +// CHECK9-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK9: omp_section_loop.body.case.cont: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK9: omp_section_loop.body.case.section.after: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -3944,97 +3974,109 @@ // CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1 // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] // CHECK9: omp_section_loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM14]]) // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK9: omp_section_loop.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK9: omp_section_loop.aftersections.fini: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK9: omp_section_loop.preheader13: -// CHECK9-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, i32* [[P_LASTITER28]], i32* [[P_LOWERBOUND29]], i32* [[P_UPPERBOUND30]], i32* [[P_STRIDE31]], i32 1, i32 0) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND29]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 +// CHECK9-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK9: section_finish: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE15:%.*]] +// CHECK9: .ompfinalize15: +// CHECK9-NEXT: br label [[SECTION_FINI]] +// CHECK9: section_fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER16:%.*]] +// CHECK9: omp_section_loop.preheader16: +// CHECK9-NEXT: store i32 0, i32* [[P_LOWERBOUND31]], align 4 +// CHECK9-NEXT: store i32 1, i32* [[P_UPPERBOUND32]], align 4 +// CHECK9-NEXT: store i32 1, i32* [[P_STRIDE33]], align 4 +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM34:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]], i32 34, i32* [[P_LASTITER30]], i32* [[P_LOWERBOUND31]], i32* [[P_UPPERBOUND32]], i32* [[P_STRIDE33]], i32 1, i32 0) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND31]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND32]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK9-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK9: omp_section_loop.header14: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK9: omp_section_loop.cond15: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK9-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK9: omp_section_loop.body16: -// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER17:%.*]] +// CHECK9: omp_section_loop.header17: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_IV23:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER16]] ], [ [[OMP_SECTION_LOOP_NEXT25:%.*]], [[OMP_SECTION_LOOP_INC20:%.*]] ] +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_COND18:%.*]] +// CHECK9: omp_section_loop.cond18: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP24:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV23]], [[TMP12]] +// CHECK9-NEXT: br i1 [[OMP_SECTION_LOOP_CMP24]], label [[OMP_SECTION_LOOP_BODY19:%.*]], label [[OMP_SECTION_LOOP_EXIT21:%.*]] +// CHECK9: omp_section_loop.body19: +// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV23]], [[TMP9]] // CHECK9-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK9-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK9-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK9-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK9-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER:%.*]] [ +// CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK9-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE28:%.*]] // CHECK9-NEXT: ] -// CHECK9: omp_section_loop.body.case23: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case23.split: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK9: omp_section_loop.body.case23.section.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK9: omp_section_loop.body.case25: +// CHECK9: omp_section_loop.body.case26: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK9-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case26.cncl: +// CHECK9-NEXT: br label [[SECTION_FINI37:%.*]] +// CHECK9: omp_section_loop.body.case26.cont: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case26.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body.case28: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM29:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM29]], i32 3) // CHECK9-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case25.split: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK9: omp_section_loop.body.case25.section.after26: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK9: omp_section_loop.body.case25.section.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK9: omp_section_loop.body16.sections.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK9: omp_section_loop.inc17: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK9: omp_section_loop.exit18: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK9: omp_section_loop.after19: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK9: omp_section_loop.after19sections.fini: +// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE28_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE28_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case28.cncl: +// CHECK9-NEXT: br label [[SECTION_FINI37]] +// CHECK9: omp_section_loop.body.case28.cont: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTIONFINI:%.*]] +// CHECK9: omp_section_loop.body.case28.sectionfini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case28.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body19.sections.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC20]] +// CHECK9: omp_section_loop.inc20: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT25]] = add nuw i32 [[OMP_SECTION_LOOP_IV23]], 1 +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER17]] +// CHECK9: omp_section_loop.exit21: +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM35:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM35]]) +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER22:%.*]] +// CHECK9: omp_section_loop.after22: +// CHECK9-NEXT: br label [[SECTION_FINISH36:%.*]] +// CHECK9: section_finish36: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK9: .ompfinalize38: +// CHECK9-NEXT: br label [[SECTION_FINI37]] +// CHECK9: section_fini37: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[SUB40:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB40]], i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK9-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK9-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 +// CHECK9-NEXT: [[CMP43:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: br i1 [[CMP43]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -4048,29 +4090,23 @@ // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK9-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP44:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP44]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK9-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD45]], i32* [[I41]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK9-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK9-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TOBOOL46:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK9-NEXT: br i1 [[TOBOOL46]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 2) // CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK9-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: // CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK9: omp_section_loop.body.case.cncl: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK9: omp_section_loop.body.case23.cncl: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK9: omp_section_loop.body.case25.cncl: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: @@ -4081,30 +4117,30 @@ // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK9-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD48:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK9-NEXT: store i32 [[ADD48]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: omp.precond.end: // CHECK9-NEXT: br label [[CANCEL_CONT]] // CHECK9: cancel.cont: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK9-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM51]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM52:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK9-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM52]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK9-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK9-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM53:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK9-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM53]], i8* [[TMP36]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: store i32 0, i32* [[R]], align 4 @@ -4128,50 +4164,58 @@ // CHECK9: omp.par.region: // CHECK9-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK9: 3: -// CHECK9-NEXT: br label [[TMP4:%.*]] -// CHECK9: 4: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK9-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK9: omp.par.region1: +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 // CHECK9-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK9-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK9-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK9: .cncl5: +// CHECK9-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK9-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK9-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK9: omp.par.region1.cncl: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE11:%.*]] +// CHECK9: .ompfinalize11: +// CHECK9-NEXT: br label [[OMP_PAR_REGION1_CNCL_FINISPLIT:%.*]] +// CHECK9: omp.par.region1.cncl.finisplit: // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK9: .cont: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK9-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK9-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] -// CHECK9-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK9-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 +// CHECK9: omp.par.region1.cont: +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK9-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1 +// CHECK9-NEXT: [[CONV7:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP8]] +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD]] to i8 +// CHECK9-NEXT: store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1 // CHECK9-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK9: omp.par.region.parallel.after: // CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK9: omp.par.pre_finalize: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK9: .ompfinalize: // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK9: 14: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK9-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK9: .cncl: +// CHECK9: omp.par.region.if: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK9-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK9-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK9: omp.par.region.if.cncl: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE9:%.*]] +// CHECK9: .ompfinalize9: +// CHECK9-NEXT: br label [[OMP_PAR_REGION_IF_CNCL_FINISPLIT:%.*]] +// CHECK9: omp.par.region.if.cncl.finisplit: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK9: .split: -// CHECK9-NEXT: br label [[TMP4]] +// CHECK9: omp.par.region.if.cont: +// CHECK9-NEXT: br label [[OMP_PAR_REGION1]] // CHECK9: omp.par.outlined.exit.exitStub: // CHECK9-NEXT: ret void // @@ -4260,14 +4304,14 @@ // CHECK9-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.sections.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK9: .omp.sections.case.split: +// CHECK9-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9: .cancel.exit: +// CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK9: .omp.sections.case.cncl: -// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.sections.exit: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: @@ -4276,13 +4320,13 @@ // CHECK9-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK9-NEXT: br label [[CANCEL_CONT]] +// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // @@ -4303,7 +4347,7 @@ // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -4324,25 +4368,23 @@ // CHECK9-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.sections.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK9: .omp.sections.case.split: +// CHECK9-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9: .cancel.exit: +// CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK9: .omp.sections.case.cncl: -// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.sections.case2: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK9-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK9: .omp.sections.case2.split: -// CHECK9-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK9: .omp.sections.case2.section.after: +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK9: .cancel.exit4: +// CHECK9-NEXT: br label [[CANCEL_EXIT]] +// CHECK9: .cancel.continue5: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK9: .omp.sections.case2.cncl: -// CHECK9-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK9: .omp.sections.exit: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: @@ -4351,14 +4393,14 @@ // CHECK9-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK9-NEXT: br label [[CANCEL_CONT]] +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // // @@ -4405,7 +4447,7 @@ // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -4433,7 +4475,7 @@ // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK9-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -4455,14 +4497,14 @@ // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK9-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK9-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK9-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -4472,10 +4514,10 @@ // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.reduction.case2: @@ -4525,20 +4567,20 @@ // CHECK10-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_LASTITER30:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_LOWERBOUND31:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_UPPERBOUND32:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_STRIDE33:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I41:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK10-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -4561,8 +4603,8 @@ // CHECK10-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4 // CHECK10-NEXT: store i32 0, i32* [[P_UPPERBOUND]], align 4 // CHECK10-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) // CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]] @@ -4582,11 +4624,13 @@ // CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]] // CHECK10-NEXT: ] // CHECK10: omp_section_loop.body.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 3) // CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case.split: +// CHECK10-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case.cncl: +// CHECK10-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK10: omp_section_loop.body.case.cont: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK10: omp_section_loop.body.case.section.after: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -4596,97 +4640,109 @@ // CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1 // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] // CHECK10: omp_section_loop.exit: -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM14]]) // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK10: omp_section_loop.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK10: omp_section_loop.aftersections.fini: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK10: omp_section_loop.preheader13: -// CHECK10-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, i32* [[P_LASTITER28]], i32* [[P_LOWERBOUND29]], i32* [[P_UPPERBOUND30]], i32* [[P_STRIDE31]], i32 1, i32 0) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND29]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 +// CHECK10-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK10: section_finish: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE15:%.*]] +// CHECK10: .ompfinalize15: +// CHECK10-NEXT: br label [[SECTION_FINI]] +// CHECK10: section_fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER16:%.*]] +// CHECK10: omp_section_loop.preheader16: +// CHECK10-NEXT: store i32 0, i32* [[P_LOWERBOUND31]], align 4 +// CHECK10-NEXT: store i32 1, i32* [[P_UPPERBOUND32]], align 4 +// CHECK10-NEXT: store i32 1, i32* [[P_STRIDE33]], align 4 +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM34:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]], i32 34, i32* [[P_LASTITER30]], i32* [[P_LOWERBOUND31]], i32* [[P_UPPERBOUND32]], i32* [[P_STRIDE33]], i32 1, i32 0) +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND31]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND32]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK10-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK10: omp_section_loop.header14: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK10: omp_section_loop.cond15: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK10-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK10: omp_section_loop.body16: -// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER17:%.*]] +// CHECK10: omp_section_loop.header17: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_IV23:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER16]] ], [ [[OMP_SECTION_LOOP_NEXT25:%.*]], [[OMP_SECTION_LOOP_INC20:%.*]] ] +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_COND18:%.*]] +// CHECK10: omp_section_loop.cond18: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP24:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV23]], [[TMP12]] +// CHECK10-NEXT: br i1 [[OMP_SECTION_LOOP_CMP24]], label [[OMP_SECTION_LOOP_BODY19:%.*]], label [[OMP_SECTION_LOOP_EXIT21:%.*]] +// CHECK10: omp_section_loop.body19: +// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV23]], [[TMP9]] // CHECK10-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK10-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK10-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK10-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK10-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER:%.*]] [ +// CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK10-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE28:%.*]] // CHECK10-NEXT: ] -// CHECK10: omp_section_loop.body.case23: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) -// CHECK10-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK10-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case23.split: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK10: omp_section_loop.body.case23.section.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK10: omp_section_loop.body.case25: +// CHECK10: omp_section_loop.body.case26: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK10-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 +// CHECK10-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case26.cncl: +// CHECK10-NEXT: br label [[SECTION_FINI37:%.*]] +// CHECK10: omp_section_loop.body.case26.cont: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case26.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body.case28: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM29:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM29]], i32 3) // CHECK10-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK10-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case25.split: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK10: omp_section_loop.body.case25.section.after26: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK10: omp_section_loop.body.case25.section.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK10: omp_section_loop.body16.sections.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK10: omp_section_loop.inc17: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK10: omp_section_loop.exit18: -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK10: omp_section_loop.after19: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK10: omp_section_loop.after19sections.fini: +// CHECK10-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE28_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE28_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case28.cncl: +// CHECK10-NEXT: br label [[SECTION_FINI37]] +// CHECK10: omp_section_loop.body.case28.cont: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTIONFINI:%.*]] +// CHECK10: omp_section_loop.body.case28.sectionfini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case28.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body19.sections.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC20]] +// CHECK10: omp_section_loop.inc20: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT25]] = add nuw i32 [[OMP_SECTION_LOOP_IV23]], 1 +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER17]] +// CHECK10: omp_section_loop.exit21: +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM35:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM35]]) +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER22:%.*]] +// CHECK10: omp_section_loop.after22: +// CHECK10-NEXT: br label [[SECTION_FINISH36:%.*]] +// CHECK10: section_finish36: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK10: .ompfinalize38: +// CHECK10-NEXT: br label [[SECTION_FINI37]] +// CHECK10: section_fini37: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK10-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK10-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK10-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[SUB40:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK10-NEXT: store i32 [[SUB40]], i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK10-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK10-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK10-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 +// CHECK10-NEXT: [[CMP43:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: br i1 [[CMP43]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -4700,29 +4756,23 @@ // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK10-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP44:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK10-NEXT: br i1 [[CMP44]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK10-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL]] +// CHECK10-NEXT: store i32 [[ADD45]], i32* [[I41]], align 4 // CHECK10-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK10-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK10-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK10-NEXT: [[TOBOOL46:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK10-NEXT: br i1 [[TOBOOL46]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 2) // CHECK10-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK10-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK10: .cancel.exit: // CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK10: omp_section_loop.body.case.cncl: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK10: omp_section_loop.body.case23.cncl: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK10: omp_section_loop.body.case25.cncl: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: @@ -4733,30 +4783,30 @@ // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK10-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD48:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK10-NEXT: store i32 [[ADD48]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: omp.precond.end: // CHECK10-NEXT: br label [[CANCEL_CONT]] // CHECK10: cancel.cont: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK10-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM51]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM52:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK10-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM52]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK10-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK10-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK10-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM53:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK10-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM53]], i8* [[TMP36]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: store i32 0, i32* [[R]], align 4 @@ -4780,50 +4830,58 @@ // CHECK10: omp.par.region: // CHECK10-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK10-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK10: 3: -// CHECK10-NEXT: br label [[TMP4:%.*]] -// CHECK10: 4: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK10-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK10-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK10: omp.par.region1: +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK10-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK10-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 // CHECK10-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK10-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK10-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK10: .cncl5: +// CHECK10-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK10-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK10-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK10: omp.par.region1.cncl: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE11:%.*]] +// CHECK10: .ompfinalize11: +// CHECK10-NEXT: br label [[OMP_PAR_REGION1_CNCL_FINISPLIT:%.*]] +// CHECK10: omp.par.region1.cncl.finisplit: // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK10: .cont: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK10-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK10-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK10-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] -// CHECK10-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK10-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 +// CHECK10: omp.par.region1.cont: +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK10-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8 +// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1 +// CHECK10-NEXT: [[CONV7:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP8]] +// CHECK10-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD]] to i8 +// CHECK10-NEXT: store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1 // CHECK10-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK10: omp.par.region.parallel.after: // CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK10: omp.par.pre_finalize: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK10: .ompfinalize: // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK10: 14: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK10-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK10-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK10: .cncl: +// CHECK10: omp.par.region.if: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK10-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK10-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK10-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK10: omp.par.region.if.cncl: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE9:%.*]] +// CHECK10: .ompfinalize9: +// CHECK10-NEXT: br label [[OMP_PAR_REGION_IF_CNCL_FINISPLIT:%.*]] +// CHECK10: omp.par.region.if.cncl.finisplit: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK10: .split: -// CHECK10-NEXT: br label [[TMP4]] +// CHECK10: omp.par.region.if.cont: +// CHECK10-NEXT: br label [[OMP_PAR_REGION1]] // CHECK10: omp.par.outlined.exit.exitStub: // CHECK10-NEXT: ret void // @@ -4912,14 +4970,14 @@ // CHECK10-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK10-NEXT: ] // CHECK10: .omp.sections.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK10: .omp.sections.case.split: +// CHECK10-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK10: .cancel.exit: +// CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK10: .omp.sections.case.cncl: -// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.sections.exit: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: @@ -4928,13 +4986,13 @@ // CHECK10-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK10-NEXT: br label [[CANCEL_CONT]] +// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: cancel.cont: // CHECK10-NEXT: ret void // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK10-NEXT: br label [[CANCEL_CONT]] // @@ -4955,7 +5013,7 @@ // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -4976,25 +5034,23 @@ // CHECK10-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK10-NEXT: ] // CHECK10: .omp.sections.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK10: .omp.sections.case.split: +// CHECK10-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK10: .cancel.exit: +// CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK10: .omp.sections.case.cncl: -// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.sections.case2: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK10-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK10-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK10-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK10: .omp.sections.case2.split: -// CHECK10-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK10: .omp.sections.case2.section.after: +// CHECK10-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK10-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK10: .cancel.exit4: +// CHECK10-NEXT: br label [[CANCEL_EXIT]] +// CHECK10: .cancel.continue5: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK10: .omp.sections.case2.cncl: -// CHECK10-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK10: .omp.sections.exit: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: @@ -5003,14 +5059,14 @@ // CHECK10-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK10-NEXT: br label [[CANCEL_CONT]] +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: cancel.cont: // CHECK10-NEXT: ret void // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK10-NEXT: br label [[CANCEL_CONT]] // // @@ -5057,7 +5113,7 @@ // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK10-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -5085,7 +5141,7 @@ // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK10-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK10-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK10-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -5107,14 +5163,14 @@ // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK10-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK10-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK10-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK10-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK10-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK10-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK10-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK10-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK10-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -5124,10 +5180,10 @@ // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK10-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK10-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK10-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK10-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK10-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.reduction.case2: diff --git a/clang/test/OpenMP/critical_codegen.cpp b/clang/test/OpenMP/critical_codegen.cpp --- a/clang/test/OpenMP/critical_codegen.cpp +++ b/clang/test/OpenMP/critical_codegen.cpp @@ -35,6 +35,8 @@ // ALL-NEXT: store i8 2, i8* [[A_ADDR]] // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) #pragma omp critical a = 2; @@ -44,6 +46,8 @@ // NORMAL-NEXT: invoke {{.*}}void [[FOO]]() // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) #pragma omp critical(the_name) foo(); @@ -53,6 +57,8 @@ // NORMAL-NEXT: invoke {{.*}}void [[FOO]]() // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]]) #pragma omp critical(the_name1) hint(23) foo(); diff --git a/clang/test/OpenMP/critical_codegen_attr.cpp b/clang/test/OpenMP/critical_codegen_attr.cpp --- a/clang/test/OpenMP/critical_codegen_attr.cpp +++ b/clang/test/OpenMP/critical_codegen_attr.cpp @@ -35,6 +35,8 @@ // ALL-NEXT: store i8 2, i8* [[A_ADDR]] // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) [[omp::directive(critical)]] a = 2; @@ -44,6 +46,8 @@ // NORMAL-NEXT: invoke {{.*}}void [[FOO]]() // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) [[omp::directive(critical(the_name))]] foo(); @@ -53,6 +57,8 @@ // NORMAL-NEXT: invoke {{.*}}void [[FOO]]() // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]]) [[omp::directive(critical(the_name1) hint(23))]] foo(); diff --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c --- a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c +++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c @@ -16,8 +16,8 @@ // ALL-NEXT: br label [[OMP_PARALLEL:%.*]] // ALL: omp_parallel: // ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z17nested_parallel_0v..omp_par.1 to void (i32*, i32*, ...)*)) -// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT12:%.*]] -// ALL: omp.par.outlined.exit12: +// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT13:%.*]] +// ALL: omp.par.outlined.exit13: // ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // ALL: omp.par.exit.split: // ALL-NEXT: ret void @@ -33,7 +33,7 @@ // ALL-LABEL: @_Z17nested_parallel_1Pfid( // ALL-NEXT: entry: -// ALL-NEXT: [[STRUCTARG14:%.*]] = alloca { i32*, double*, float** }, align 8 +// ALL-NEXT: [[STRUCTARG15:%.*]] = alloca { i32*, double*, float** }, align 8 // ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 @@ -43,15 +43,15 @@ // ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // ALL-NEXT: br label [[OMP_PARALLEL:%.*]] // ALL: omp_parallel: -// ALL-NEXT: [[GEP_A_ADDR15:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 0 -// ALL-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR15]], align 8 -// ALL-NEXT: [[GEP_B_ADDR16:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 1 -// ALL-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR16]], align 8 -// ALL-NEXT: [[GEP_R_ADDR17:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 2 -// ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR17]], align 8 -// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG14]]) -// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT13:%.*]] -// ALL: omp.par.outlined.exit13: +// ALL-NEXT: [[GEP_A_ADDR16:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG15]], i32 0, i32 0 +// ALL-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR16]], align 8 +// ALL-NEXT: [[GEP_B_ADDR17:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG15]], i32 0, i32 1 +// ALL-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR17]], align 8 +// ALL-NEXT: [[GEP_R_ADDR18:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG15]], i32 0, i32 2 +// ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR18]], align 8 +// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG15]]) +// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT14:%.*]] +// ALL: omp.par.outlined.exit14: // ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // ALL: omp.par.exit.split: // ALL-NEXT: ret void @@ -85,17 +85,17 @@ // ALL-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2 // ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8 // ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]]) -// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT55:%.*]] -// ALL: omp.par.outlined.exit55: +// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT58:%.*]] +// ALL: omp.par.outlined.exit58: // ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // ALL: omp.par.exit.split: // ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// ALL-NEXT: [[CONV56:%.*]] = sitofp i32 [[TMP0]] to double +// ALL-NEXT: [[CONV59:%.*]] = sitofp i32 [[TMP0]] to double // ALL-NEXT: [[TMP1:%.*]] = load double, double* [[B_ADDR]], align 8 -// ALL-NEXT: [[ADD57:%.*]] = fadd double [[CONV56]], [[TMP1]] -// ALL-NEXT: [[CONV58:%.*]] = fptrunc double [[ADD57]] to float +// ALL-NEXT: [[ADD60:%.*]] = fadd double [[CONV59]], [[TMP1]] +// ALL-NEXT: [[CONV61:%.*]] = fptrunc double [[ADD60]] to float // ALL-NEXT: [[TMP2:%.*]] = load float*, float** [[R_ADDR]], align 8 -// ALL-NEXT: store float [[CONV58]], float* [[TMP2]], align 4 +// ALL-NEXT: store float [[CONV61]], float* [[TMP2]], align 4 // ALL-NEXT: ret void // void nested_parallel_2(float *r, int a, double b) { diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c --- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -44,7 +44,7 @@ // CHECK-LABEL: @_Z14parallel_for_1Pfid( // CHECK-NEXT: entry: -// CHECK-NEXT: [[STRUCTARG17:%.*]] = alloca { i32*, double*, float** }, align 8 +// CHECK-NEXT: [[STRUCTARG18:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 @@ -54,46 +54,46 @@ // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK: omp_parallel: -// CHECK-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0 -// CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8 -// CHECK-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1 -// CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8 -// CHECK-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2 -// CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8 -// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG17]]) -// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]] -// CHECK: omp.par.outlined.exit16: +// CHECK-NEXT: [[GEP_A_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG18]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR19]], align 8 +// CHECK-NEXT: [[GEP_B_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG18]], i32 0, i32 1 +// CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR20]], align 8 +// CHECK-NEXT: [[GEP_R_ADDR21:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG18]], i32 0, i32 2 +// CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR21]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG18]]) +// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT17:%.*]] +// CHECK: omp.par.outlined.exit17: // CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK: omp.par.exit.split: // CHECK-NEXT: ret void // // CHECK-DEBUG-LABEL: @_Z14parallel_for_1Pfid( // CHECK-DEBUG-NEXT: entry: -// CHECK-DEBUG-NEXT: [[STRUCTARG17:%.*]] = alloca { i32*, double*, float** }, align 8 +// CHECK-DEBUG-NEXT: [[STRUCTARG18:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 // CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]] // CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] // CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG78:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG79:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1 -// CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8 -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG17]]), !dbg [[DBG79:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]] -// CHECK-DEBUG: omp.par.outlined.exit16: +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG18]], i32 0, i32 0 +// CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR19]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_B_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG18]], i32 0, i32 1 +// CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR20]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR21:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG18]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR21]], align 8 +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG18]]), !dbg [[DBG80:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT17:%.*]] +// CHECK-DEBUG: omp.par.outlined.exit17: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG81:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG82:![0-9]+]] // void parallel_for_1(float *r, int a, double b) { #pragma omp parallel @@ -114,14 +114,14 @@ // CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[I185:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AGG_CAPTURED186:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED187:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR188:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER203:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND204:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I188:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED189:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED190:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR191:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER206:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND207:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND208:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE209:%.*]] = alloca i32, align 4 // CHECK-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 // CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 // CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 @@ -135,57 +135,57 @@ // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2 // CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8 // CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]]) -// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]] -// CHECK: omp.par.outlined.exit184: +// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT187:%.*]] +// CHECK: omp.par.outlined.exit187: // CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK: omp.par.exit.split: -// CHECK-NEXT: store i32 0, i32* [[I185]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0 -// CHECK-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4 +// CHECK-NEXT: store i32 0, i32* [[I188]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED189]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[I188]], i32** [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED190]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I188]], align 4 // CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]) -// CHECK-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]] -// CHECK: omp_loop.preheader190: -// CHECK-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1 -// CHECK-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4 -// CHECK-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 0) -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4 +// CHECK-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR191]], %struct.anon.17* [[AGG_CAPTURED189]]) +// CHECK-NEXT: [[DOTCOUNT192:%.*]] = load i32, i32* [[DOTCOUNT_ADDR191]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER193:%.*]] +// CHECK: omp_loop.preheader193: +// CHECK-NEXT: store i32 0, i32* [[P_LOWERBOUND207]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT192]], 1 +// CHECK-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND208]], align 4 +// CHECK-NEXT: store i32 1, i32* [[P_STRIDE209]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM210:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM210]], i32 34, i32* [[P_LASTITER206]], i32* [[P_LOWERBOUND207]], i32* [[P_UPPERBOUND208]], i32* [[P_STRIDE209]], i32 1, i32 0) +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND207]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND208]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]] // CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER191:%.*]] -// CHECK: omp_loop.header191: -// CHECK-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ] -// CHECK-NEXT: br label [[OMP_LOOP_COND192:%.*]] -// CHECK: omp_loop.cond192: -// CHECK-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]] -// CHECK-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]] -// CHECK: omp_loop.body193: -// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]] -// CHECK-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]) +// CHECK-NEXT: br label [[OMP_LOOP_HEADER194:%.*]] +// CHECK: omp_loop.header194: +// CHECK-NEXT: [[OMP_LOOP_IV200:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER193]] ], [ [[OMP_LOOP_NEXT202:%.*]], [[OMP_LOOP_INC197:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND195:%.*]] +// CHECK: omp_loop.cond195: +// CHECK-NEXT: [[OMP_LOOP_CMP201:%.*]] = icmp ult i32 [[OMP_LOOP_IV200]], [[TMP7]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP201]], label [[OMP_LOOP_BODY196:%.*]], label [[OMP_LOOP_EXIT198:%.*]] +// CHECK: omp_loop.body196: +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV200]], [[TMP4]] +// CHECK-NEXT: call void @__captured_stmt.20(i32* [[I188]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED190]]) // CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double +// CHECK-NEXT: [[CONV203:%.*]] = sitofp i32 [[TMP9]] to double // CHECK-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8 -// CHECK-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]] -// CHECK-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float +// CHECK-NEXT: [[ADD204:%.*]] = fadd double [[CONV203]], [[TMP10]] +// CHECK-NEXT: [[CONV205:%.*]] = fptrunc double [[ADD204]] to float // CHECK-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8 -// CHECK-NEXT: store float [[CONV202]], float* [[TMP11]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_INC194]] -// CHECK: omp_loop.inc194: -// CHECK-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER191]] -// CHECK: omp_loop.exit195: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]) -// CHECK-NEXT: br label [[OMP_LOOP_AFTER196:%.*]] -// CHECK: omp_loop.after196: +// CHECK-NEXT: store float [[CONV205]], float* [[TMP11]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC197]] +// CHECK: omp_loop.inc197: +// CHECK-NEXT: [[OMP_LOOP_NEXT202]] = add nuw i32 [[OMP_LOOP_IV200]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER194]] +// CHECK: omp_loop.exit198: +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM210]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM211:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM211]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER199:%.*]] +// CHECK: omp_loop.after199: // CHECK-NEXT: ret void // // CHECK-DEBUG-LABEL: @_Z14parallel_for_2Pfid( @@ -194,21 +194,21 @@ // CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 -// CHECK-DEBUG-NEXT: [[I185:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED186:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED187:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR188:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER203:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND204:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[I188:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED189:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED190:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR191:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LASTITER206:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND207:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND208:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE209:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]] // CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]] // CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG140:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 0 @@ -217,60 +217,60 @@ // CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]] -// CHECK-DEBUG: omp.par.outlined.exit184: +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]]), !dbg [[DBG141:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT187:%.*]] +// CHECK-DEBUG: omp.par.outlined.exit187: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, i32* [[I185]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG149:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.preheader190: -// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.header191: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.cond192: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.body193: -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG150:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.inc194: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.exit195: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.after196: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I188]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 0, i32* [[I188]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED189]], i32 0, i32 0, !dbg [[DBG149:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32* [[I188]], i32** [[TMP0]], align 8, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED190]], i32 0, i32 0, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I188]], align 4, !dbg [[DBG150:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR191]], %struct.anon.17* [[AGG_CAPTURED189]]), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT192:%.*]] = load i32, i32* [[DOTCOUNT_ADDR191]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER193:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.preheader193: +// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND207]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT192]], 1, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND208]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE209]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM210:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM210]], i32 34, i32* [[P_LASTITER206]], i32* [[P_LOWERBOUND207]], i32* [[P_UPPERBOUND208]], i32* [[P_STRIDE209]], i32 1, i32 0), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND207]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND208]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER194:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.header194: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV200:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER193]] ], [ [[OMP_LOOP_NEXT202:%.*]], [[OMP_LOOP_INC197:%.*]] ], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND195:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.cond195: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP201:%.*]] = icmp ult i32 [[OMP_LOOP_IV200]], [[TMP7]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP201]], label [[OMP_LOOP_BODY196:%.*]], label [[OMP_LOOP_EXIT198:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.body196: +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV200]], [[TMP4]], !dbg [[DBG151:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I188]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED190]]), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG152:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV203:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG152]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: [[ADD204:%.*]] = fadd double [[CONV203]], [[TMP10]], !dbg [[DBG153:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV205:%.*]] = fptrunc double [[ADD204]] to float, !dbg [[DBG152]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG154:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV205]], float* [[TMP11]], align 4, !dbg [[DBG155:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC197]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.inc197: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT202]] = add nuw i32 [[OMP_LOOP_IV200]], 1, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER194]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.exit198: +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM210]]), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM211:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM211]]), !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER199:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.after199: +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG156:![0-9]+]] // void parallel_for_2(float *r, int a, double b) { #pragma omp parallel diff --git a/clang/test/OpenMP/masked_codegen.cpp b/clang/test/OpenMP/masked_codegen.cpp --- a/clang/test/OpenMP/masked_codegen.cpp +++ b/clang/test/OpenMP/masked_codegen.cpp @@ -35,6 +35,8 @@ // ALL-NEXT: store i8 2, i8* [[A_ADDR]] // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // ALL-NEXT: br label {{%?}}[[EXIT]] // ALL: [[EXIT]] diff --git a/clang/test/OpenMP/master_codegen.cpp b/clang/test/OpenMP/master_codegen.cpp --- a/clang/test/OpenMP/master_codegen.cpp +++ b/clang/test/OpenMP/master_codegen.cpp @@ -35,6 +35,8 @@ // ALL-NEXT: store i8 2, i8* [[A_ADDR]] // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_master([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // ALL-NEXT: br label {{%?}}[[EXIT]] // ALL: [[EXIT]] diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp --- a/clang/test/OpenMP/ordered_codegen.cpp +++ b/clang/test/OpenMP/ordered_codegen.cpp @@ -1386,6 +1386,8 @@ // CHECK1-IRBUILDER-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK1-IRBUILDER: .ompfinalize: // CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: @@ -1474,6 +1476,8 @@ // CHECK1-IRBUILDER-NEXT: store float [[MUL7]], float* [[ARRAYIDX8]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK1-IRBUILDER: .ompfinalize: // CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: @@ -1610,6 +1614,8 @@ // CHECK1-IRBUILDER-NEXT: store float [[MUL29]], float* [[ARRAYIDX31]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK1-IRBUILDER: .ompfinalize: // CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: @@ -1717,6 +1723,8 @@ // CHECK1-IRBUILDER-NEXT: store float [[MUL14]], float* [[ARRAYIDX16]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK1-IRBUILDER: .ompfinalize: // CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: @@ -1803,6 +1811,8 @@ // CHECK1-IRBUILDER-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK1-IRBUILDER: .ompfinalize: // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1866,7 +1876,7 @@ // CHECK1-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK1-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 // CHECK1-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] -// CHECK1-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] +// CHECK1-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END43:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body33: // CHECK1-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK1-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 @@ -1880,17 +1890,19 @@ // CHECK1-IRBUILDER-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body33.ordered.after: -// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] -// CHECK1-IRBUILDER: omp.body.continue38: -// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] -// CHECK1-IRBUILDER: omp.inner.for.inc39: +// CHECK1-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK1-IRBUILDER: .ompfinalize38: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE39:%.*]] +// CHECK1-IRBUILDER: omp.body.continue39: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC40:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.inc40: // CHECK1-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK1-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 -// CHECK1-IRBUILDER-NEXT: store i32 [[ADD40]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) -// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[ADD41:%.*]] = add i32 [[TMP32]], 1 +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD41]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]]), !llvm.access.group !7 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK1-IRBUILDER: omp.inner.for.end42: +// CHECK1-IRBUILDER: omp.inner.for.end43: // CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1-IRBUILDER: omp.dispatch.inc: // CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] @@ -1902,19 +1914,19 @@ // CHECK1-IRBUILDER-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 // CHECK1-IRBUILDER-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 // CHECK1-IRBUILDER-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 -// CHECK1-IRBUILDER-NEXT: [[SUB43:%.*]] = sub i32 [[TMP36]], [[TMP37]] -// CHECK1-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[SUB43]], 1 -// CHECK1-IRBUILDER-NEXT: [[ADD45:%.*]] = add i32 [[SUB44]], 1 -// CHECK1-IRBUILDER-NEXT: [[DIV46:%.*]] = udiv i32 [[ADD45]], 1 -// CHECK1-IRBUILDER-NEXT: [[MUL47:%.*]] = mul i32 [[DIV46]], 1 -// CHECK1-IRBUILDER-NEXT: [[ADD48:%.*]] = add i32 [[TMP35]], [[MUL47]] -// CHECK1-IRBUILDER-NEXT: store i32 [[ADD48]], i32* [[I28]], align 4 +// CHECK1-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[TMP36]], [[TMP37]] +// CHECK1-IRBUILDER-NEXT: [[SUB45:%.*]] = sub i32 [[SUB44]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD46:%.*]] = add i32 [[SUB45]], 1 +// CHECK1-IRBUILDER-NEXT: [[DIV47:%.*]] = udiv i32 [[ADD46]], 1 +// CHECK1-IRBUILDER-NEXT: [[MUL48:%.*]] = mul i32 [[DIV47]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD49:%.*]] = add i32 [[TMP35]], [[MUL48]] +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD49]], i32* [[I28]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1-IRBUILDER: .omp.final.done: // CHECK1-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] // CHECK1-IRBUILDER: omp.precond.end: -// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK1-IRBUILDER-NEXT: ret void // // @@ -2014,6 +2026,8 @@ // CHECK2-IRBUILDER-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK2-IRBUILDER: .ompfinalize: // CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2-IRBUILDER: omp.body.continue: @@ -2102,6 +2116,8 @@ // CHECK2-IRBUILDER-NEXT: store float [[MUL7]], float* [[ARRAYIDX8]], align 4 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK2-IRBUILDER: .ompfinalize: // CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2-IRBUILDER: omp.body.continue: @@ -2238,6 +2254,8 @@ // CHECK2-IRBUILDER-NEXT: store float [[MUL29]], float* [[ARRAYIDX31]], align 4 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK2-IRBUILDER: .ompfinalize: // CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) // CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2-IRBUILDER: omp.body.continue: @@ -2345,6 +2363,8 @@ // CHECK2-IRBUILDER-NEXT: store float [[MUL14]], float* [[ARRAYIDX16]], align 4 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK2-IRBUILDER: .ompfinalize: // CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) // CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2-IRBUILDER: omp.body.continue: @@ -2431,6 +2451,8 @@ // CHECK2-IRBUILDER-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK2-IRBUILDER: .ompfinalize: // CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2-IRBUILDER: omp.body.continue: // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2494,7 +2516,7 @@ // CHECK2-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK2-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 // CHECK2-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] -// CHECK2-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] +// CHECK2-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END43:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body33: // CHECK2-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK2-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 @@ -2508,17 +2530,19 @@ // CHECK2-IRBUILDER-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body33.ordered.after: -// CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] -// CHECK2-IRBUILDER: omp.body.continue38: -// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] -// CHECK2-IRBUILDER: omp.inner.for.inc39: +// CHECK2-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK2-IRBUILDER: .ompfinalize38: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE39:%.*]] +// CHECK2-IRBUILDER: omp.body.continue39: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC40:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.inc40: // CHECK2-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK2-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 -// CHECK2-IRBUILDER-NEXT: store i32 [[ADD40]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) -// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[ADD41:%.*]] = add i32 [[TMP32]], 1 +// CHECK2-IRBUILDER-NEXT: store i32 [[ADD41]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]]), !llvm.access.group !7 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK2-IRBUILDER: omp.inner.for.end42: +// CHECK2-IRBUILDER: omp.inner.for.end43: // CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2-IRBUILDER: omp.dispatch.inc: // CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] @@ -2530,19 +2554,19 @@ // CHECK2-IRBUILDER-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 // CHECK2-IRBUILDER-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 // CHECK2-IRBUILDER-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 -// CHECK2-IRBUILDER-NEXT: [[SUB43:%.*]] = sub i32 [[TMP36]], [[TMP37]] -// CHECK2-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[SUB43]], 1 -// CHECK2-IRBUILDER-NEXT: [[ADD45:%.*]] = add i32 [[SUB44]], 1 -// CHECK2-IRBUILDER-NEXT: [[DIV46:%.*]] = udiv i32 [[ADD45]], 1 -// CHECK2-IRBUILDER-NEXT: [[MUL47:%.*]] = mul i32 [[DIV46]], 1 -// CHECK2-IRBUILDER-NEXT: [[ADD48:%.*]] = add i32 [[TMP35]], [[MUL47]] -// CHECK2-IRBUILDER-NEXT: store i32 [[ADD48]], i32* [[I28]], align 4 +// CHECK2-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[TMP36]], [[TMP37]] +// CHECK2-IRBUILDER-NEXT: [[SUB45:%.*]] = sub i32 [[SUB44]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD46:%.*]] = add i32 [[SUB45]], 1 +// CHECK2-IRBUILDER-NEXT: [[DIV47:%.*]] = udiv i32 [[ADD46]], 1 +// CHECK2-IRBUILDER-NEXT: [[MUL48:%.*]] = mul i32 [[DIV47]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD49:%.*]] = add i32 [[TMP35]], [[MUL48]] +// CHECK2-IRBUILDER-NEXT: store i32 [[ADD49]], i32* [[I28]], align 4 // CHECK2-IRBUILDER-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2-IRBUILDER: .omp.final.done: // CHECK2-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] // CHECK2-IRBUILDER: omp.precond.end: -// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK2-IRBUILDER-NEXT: ret void // // @@ -3836,6 +3860,8 @@ // CHECK3-IRBUILDER-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3-IRBUILDER: .ompfinalize: // CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: @@ -3924,6 +3950,8 @@ // CHECK3-IRBUILDER-NEXT: store float [[MUL7]], float* [[ARRAYIDX8]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3-IRBUILDER: .ompfinalize: // CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: @@ -4060,6 +4088,8 @@ // CHECK3-IRBUILDER-NEXT: store float [[MUL29]], float* [[ARRAYIDX31]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3-IRBUILDER: .ompfinalize: // CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: @@ -4167,6 +4197,8 @@ // CHECK3-IRBUILDER-NEXT: store float [[MUL14]], float* [[ARRAYIDX16]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3-IRBUILDER: .ompfinalize: // CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: @@ -4253,6 +4285,8 @@ // CHECK3-IRBUILDER-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3-IRBUILDER: .ompfinalize: // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4316,7 +4350,7 @@ // CHECK3-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK3-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 // CHECK3-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] -// CHECK3-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] +// CHECK3-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END43:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body33: // CHECK3-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK3-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 @@ -4330,17 +4364,19 @@ // CHECK3-IRBUILDER-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body33.ordered.after: -// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] -// CHECK3-IRBUILDER: omp.body.continue38: -// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] -// CHECK3-IRBUILDER: omp.inner.for.inc39: +// CHECK3-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK3-IRBUILDER: .ompfinalize38: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE39:%.*]] +// CHECK3-IRBUILDER: omp.body.continue39: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC40:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.inc40: // CHECK3-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK3-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 -// CHECK3-IRBUILDER-NEXT: store i32 [[ADD40]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) -// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[ADD41:%.*]] = add i32 [[TMP32]], 1 +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD41]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]]), !llvm.access.group !7 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK3-IRBUILDER: omp.inner.for.end42: +// CHECK3-IRBUILDER: omp.inner.for.end43: // CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3-IRBUILDER: omp.dispatch.inc: // CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] @@ -4352,19 +4388,19 @@ // CHECK3-IRBUILDER-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 // CHECK3-IRBUILDER-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 // CHECK3-IRBUILDER-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 -// CHECK3-IRBUILDER-NEXT: [[SUB43:%.*]] = sub i32 [[TMP36]], [[TMP37]] -// CHECK3-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[SUB43]], 1 -// CHECK3-IRBUILDER-NEXT: [[ADD45:%.*]] = add i32 [[SUB44]], 1 -// CHECK3-IRBUILDER-NEXT: [[DIV46:%.*]] = udiv i32 [[ADD45]], 1 -// CHECK3-IRBUILDER-NEXT: [[MUL47:%.*]] = mul i32 [[DIV46]], 1 -// CHECK3-IRBUILDER-NEXT: [[ADD48:%.*]] = add i32 [[TMP35]], [[MUL47]] -// CHECK3-IRBUILDER-NEXT: store i32 [[ADD48]], i32* [[I28]], align 4 +// CHECK3-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[TMP36]], [[TMP37]] +// CHECK3-IRBUILDER-NEXT: [[SUB45:%.*]] = sub i32 [[SUB44]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD46:%.*]] = add i32 [[SUB45]], 1 +// CHECK3-IRBUILDER-NEXT: [[DIV47:%.*]] = udiv i32 [[ADD46]], 1 +// CHECK3-IRBUILDER-NEXT: [[MUL48:%.*]] = mul i32 [[DIV47]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD49:%.*]] = add i32 [[TMP35]], [[MUL48]] +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD49]], i32* [[I28]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3-IRBUILDER: .omp.final.done: // CHECK3-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] // CHECK3-IRBUILDER: omp.precond.end: -// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK3-IRBUILDER-NEXT: ret void // // @@ -4464,6 +4500,8 @@ // CHECK4-IRBUILDER-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4-IRBUILDER: .ompfinalize: // CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4-IRBUILDER: omp.body.continue: @@ -4552,6 +4590,8 @@ // CHECK4-IRBUILDER-NEXT: store float [[MUL7]], float* [[ARRAYIDX8]], align 4 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4-IRBUILDER: .ompfinalize: // CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4-IRBUILDER: omp.body.continue: @@ -4688,6 +4728,8 @@ // CHECK4-IRBUILDER-NEXT: store float [[MUL29]], float* [[ARRAYIDX31]], align 4 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4-IRBUILDER: .ompfinalize: // CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) // CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4-IRBUILDER: omp.body.continue: @@ -4795,6 +4837,8 @@ // CHECK4-IRBUILDER-NEXT: store float [[MUL14]], float* [[ARRAYIDX16]], align 4 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4-IRBUILDER: .ompfinalize: // CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) // CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4-IRBUILDER: omp.body.continue: @@ -4881,6 +4925,8 @@ // CHECK4-IRBUILDER-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4-IRBUILDER: .ompfinalize: // CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4-IRBUILDER: omp.body.continue: // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4944,7 +4990,7 @@ // CHECK4-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK4-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 // CHECK4-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] -// CHECK4-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] +// CHECK4-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END43:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body33: // CHECK4-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK4-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 @@ -4958,17 +5004,19 @@ // CHECK4-IRBUILDER-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body33.ordered.after: -// CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] -// CHECK4-IRBUILDER: omp.body.continue38: -// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] -// CHECK4-IRBUILDER: omp.inner.for.inc39: +// CHECK4-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK4-IRBUILDER: .ompfinalize38: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE39:%.*]] +// CHECK4-IRBUILDER: omp.body.continue39: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC40:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.inc40: // CHECK4-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK4-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 -// CHECK4-IRBUILDER-NEXT: store i32 [[ADD40]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) -// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[ADD41:%.*]] = add i32 [[TMP32]], 1 +// CHECK4-IRBUILDER-NEXT: store i32 [[ADD41]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]]), !llvm.access.group !7 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK4-IRBUILDER: omp.inner.for.end42: +// CHECK4-IRBUILDER: omp.inner.for.end43: // CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4-IRBUILDER: omp.dispatch.inc: // CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] @@ -4980,19 +5028,19 @@ // CHECK4-IRBUILDER-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 // CHECK4-IRBUILDER-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 // CHECK4-IRBUILDER-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 -// CHECK4-IRBUILDER-NEXT: [[SUB43:%.*]] = sub i32 [[TMP36]], [[TMP37]] -// CHECK4-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[SUB43]], 1 -// CHECK4-IRBUILDER-NEXT: [[ADD45:%.*]] = add i32 [[SUB44]], 1 -// CHECK4-IRBUILDER-NEXT: [[DIV46:%.*]] = udiv i32 [[ADD45]], 1 -// CHECK4-IRBUILDER-NEXT: [[MUL47:%.*]] = mul i32 [[DIV46]], 1 -// CHECK4-IRBUILDER-NEXT: [[ADD48:%.*]] = add i32 [[TMP35]], [[MUL47]] -// CHECK4-IRBUILDER-NEXT: store i32 [[ADD48]], i32* [[I28]], align 4 +// CHECK4-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[TMP36]], [[TMP37]] +// CHECK4-IRBUILDER-NEXT: [[SUB45:%.*]] = sub i32 [[SUB44]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD46:%.*]] = add i32 [[SUB45]], 1 +// CHECK4-IRBUILDER-NEXT: [[DIV47:%.*]] = udiv i32 [[ADD46]], 1 +// CHECK4-IRBUILDER-NEXT: [[MUL48:%.*]] = mul i32 [[DIV47]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD49:%.*]] = add i32 [[TMP35]], [[MUL48]] +// CHECK4-IRBUILDER-NEXT: store i32 [[ADD49]], i32* [[I28]], align 4 // CHECK4-IRBUILDER-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK4-IRBUILDER: .omp.final.done: // CHECK4-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] // CHECK4-IRBUILDER: omp.precond.end: -// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK4-IRBUILDER-NEXT: ret void // // diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -765,8 +765,12 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4 // CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4 +// CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] +// CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3: .ompfinalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK3: omp.par.outlined.exit.exitStub: // CHECK3-NEXT: ret void @@ -830,8 +834,12 @@ // CHECK3-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]] // CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 [[TMP5]] // CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX2]], i64 0 +// CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] +// CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3: .ompfinalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK3: omp.par.outlined.exit.exitStub: // CHECK3-NEXT: ret void @@ -903,9 +911,13 @@ // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG35]] // CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1, !dbg [[DBG35]] // CHECK4-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG35]] -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG35]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG35]] +// CHECK4: omp.par.region.parallel.after: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG35]] +// CHECK4-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4: .ompfinalize: +// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // @@ -971,9 +983,13 @@ // CHECK4-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]], !dbg [[DBG66]] // CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 [[TMP5]], !dbg [[DBG66]] // CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX2]], i64 0, !dbg [[DBG66]] -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG67:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG67:![0-9]+]] +// CHECK4: omp.par.region.parallel.after: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG67]] +// CHECK4-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4: .ompfinalize: +// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -76,7 +76,7 @@ public: /// Create a new OpenMPIRBuilder operating on the given module \p M. This will /// not have an effect on \p M (see initialize). - OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {} + OpenMPIRBuilder(Module &M); ~OpenMPIRBuilder(); /// Initialize the internal state, this will put structures types and @@ -103,12 +103,51 @@ /// A finalize callback knows about all objects that need finalization, e.g. /// destruction, when the scope of the currently generated construct is left /// at the time, and location, the callback is invoked. - using FinalizeCallbackTy = std::function; + using FinalizeCallbackTy = function_ref; - struct FinalizationInfo { - /// The finalization callback provided by the last in-flight invocation of - /// createXXXX for the directive of kind DK. - FinalizeCallbackTy FiniCB; +private: + enum class RegionKind { + /// Sentinel object so we don't always have to check whether the stack is + /// empty. + Function, + + /// Actions on loop-associated directives are deferred until all applyXYZ + /// actions have been applied to them. + CanonicalLoop, + + /// Non-loop OpenMP regions. + Directive + }; + + struct OMPRegionInfo; + + /// An irregular exit out of a region, such as by cancellation. + struct OMPRegionBreakInfo { + /// The end of this basic block is current end of the path for breaking out + /// of the region. Must have no terminator so finializations (eg. + /// destructors) can be appended until rejoining at the end of the target + /// region. + BasicBlock *BB; + + /// What triggered the break out of a region, such as a canecellation point. + omp::Directive Reason; + + /// The kind of region that is being exited. Control flow will rejoin after + /// the innermost region of this kind. + OMPRegionInfo *Target; + + OMPRegionBreakInfo(BasicBlock *BB, omp::Directive Reason, + OMPRegionInfo *Target); + + /// Consistency self-check. + void assertOK() const; + }; + + /// An OpenMP region with a single entry and single exit (unless containing a + /// irregular exit) that may be associated with a construct. + struct OMPRegionInfo { + /// The kind of region: topmost sentinel, loop, or directive. + RegionKind Kind; /// The directive kind of the innermost directive that has an associated /// region which might require finalization when it is left. @@ -116,20 +155,45 @@ /// Flag to indicate if the directive is cancellable. bool IsCancellable; + + /// Irregular exits (such as cancellation points) out of this region. + SmallVector Breaks; + + OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable); + + /// Register an irregular exit to this region. + void addBreak(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo *Target); + + /// Consistency self-check. + void assertOK() const; }; - /// Push a finalization callback on the finalization stack. - /// - /// NOTE: Temporary solution until Clang CG is gone. - void pushFinalizationCB(const FinalizationInfo &FI) { - FinalizationStack.push_back(FI); + /// The stack of regions surrounding the current in-progress code generation + /// location. Regions are pushed and popped when entering/leaving a region. + /// Constructs/directives that are sensitive to surrounding regions (such as + /// cancellation) must be emitted inside the BodyGenCallbackTy of the + /// surrounding constructs. + SmallVector, 8> RegionStack; + + /// Return the innermost surrounding region of a specific directive kind, or + /// the toplevel region if not present. + OMPRegionInfo *getInnermostRegion(omp::Directive DK); + + /// @{ + /// Push a new region to the region stack. Must eventually be popped again + /// using exitRegion. + OMPRegionInfo *enterRegion(RegionKind Kind, omp::Directive DK, + bool IsCancellable); + OMPRegionInfo *enterRegion(omp::Directive DK, bool IsCancellable) { + return enterRegion(RegionKind::Directive, DK, IsCancellable); } + /// @} - /// Pop the last finalization callback from the finalization stack. - /// - /// NOTE: Temporary solution until Clang CG is gone. - void popFinalizationCB() { FinalizationStack.pop_back(); } + /// Pop a region from the region stack. Net yet rejoined irregular exits fall + /// through the outer surrounding region. + void exitRegion(OMPRegionInfo *R); +public: /// Callback type for body (=inner region) code generation /// /// The callback takes code locations as arguments, each describing a @@ -230,11 +294,11 @@ /// /// \param Loc The location where the directive was encountered. /// \param IfCondition The evaluated 'if' clause expression, if any. - /// \param CanceledDirective The kind of directive that is cancled. + /// \param CancelledDirective The kind of directive that is cancelled. /// /// \returns The insertion point after the barrier. InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, - omp::Directive CanceledDirective); + omp::Directive CancelledDirective); /// Generator for '#omp parallel' /// @@ -802,12 +866,14 @@ /// Generate control flow and cleanup for cancellation. /// + /// \param Loc Source location used fir debug info and ident_t. /// \param CancelFlag Flag indicating if the cancellation is performed. - /// \param CanceledDirective The kind of directive that is cancled. + /// \param CancelledDirective The kind of directive that is cancled. + /// \param CancelReason Cause of the irregular exit. /// \param ExitCB Extra code to be generated in the exit block. - void emitCancelationCheckImpl(Value *CancelFlag, - omp::Directive CanceledDirective, - FinalizeCallbackTy ExitCB = {}); + void emitCancelationCheckImpl(LocationDescription Loc, Value *CancelFlag, + omp::Directive CancelledDirective, + omp::Directive CancelReason); /// Generate a barrier runtime call. /// @@ -827,19 +893,12 @@ /// \param Loc The location at which the request originated and is fulfilled. void emitFlush(const LocationDescription &Loc); - /// The finalization stack made up of finalize callbacks currently in-flight, - /// wrapped into FinalizationInfo objects that reference also the finalization - /// target block and the kind of cancellable directive. - SmallVector FinalizationStack; - +private: /// Return true if the last entry in the finalization stack is of kind \p DK /// and cancellable. - bool isLastFinalizationInfoCancellable(omp::Directive DK) { - return !FinalizationStack.empty() && - FinalizationStack.back().IsCancellable && - FinalizationStack.back().DK == DK; - } + bool isLastFinalizationInfoCancellable(omp::Directive DK); +public: /// Generate a taskwait runtime call. /// /// \param Loc The location at which the request originated and is fulfilled. @@ -1249,9 +1308,6 @@ /// \param Conditional indicate if the entry call result will be used /// to evaluate a conditional of whether a thread will execute /// body code or not. - /// \param HasFinalize indicate if the directive will require finalization - /// and has a finalization callback in the stack that - /// should be called. /// \param IsCancellable if HasFinalize is set to true, indicate if the /// the directive should be cancellable. /// \return The insertion point after the region @@ -1260,7 +1316,7 @@ EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional = false, - bool HasFinalize = true, bool IsCancellable = false); + bool IsCancellable = false); /// Get the platform-specific name separator. /// \param Parts different parts of the final name that needs separation diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -515,7 +515,15 @@ OutlineInfos = std::move(DeferredOutlines); } +OpenMPIRBuilder::OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) { + RegionStack.emplace_back(new OMPRegionInfo( + RegionKind::Function, omp::OMPD_unknown, /*IsCancellable*/ false)); +} + OpenMPIRBuilder::~OpenMPIRBuilder() { + assert(RegionStack.size() == 1 && + RegionStack.back()->Kind == RegionKind::Function && + "OMPRegion push/pop must be balanced"); assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); } @@ -644,6 +652,137 @@ "omp_global_thread_num"); } +OpenMPIRBuilder::OMPRegionBreakInfo::OMPRegionBreakInfo(BasicBlock *BB, + omp::Directive Reason, + OMPRegionInfo *Target) + : BB(BB), Reason(Reason), Target(Target) { + assertOK(); +} + +void OpenMPIRBuilder::OMPRegionBreakInfo::assertOK() const { +#ifndef NDEBUG + assert(!BB->getTerminator() && "Pending irregular exit must be amendable"); + + assert(Target && "Irregular exit requires a target"); + switch (Target->DK) { + case OMPD_parallel: + switch (Reason) { + case OMPD_cancellation_point: + case OMPD_cancel: + case OMPD_barrier: + break; + default: + llvm_unreachable("Unexpected region break reason for parallel construct"); + } + break; + case OMPD_sections: + switch (Reason) { + case OMPD_cancellation_point: + case OMPD_cancel: + break; + default: + llvm_unreachable("Unexpected region break reason for sections construct"); + } + break; + default: + llvm_unreachable("unexpected region break target"); + } +#endif +} + +OpenMPIRBuilder::OMPRegionInfo::OMPRegionInfo(RegionKind Kind, + omp::Directive DK, + bool IsCancellable) + : Kind(Kind), DK(DK), IsCancellable(IsCancellable) { + assertOK(); +} + +void OpenMPIRBuilder::OMPRegionInfo::addBreak(BasicBlock *BB, + omp::Directive Reason, + OMPRegionInfo *Target) { + // TODO: Multiple breaks with same Target/Reasons can be combined. + + assert(IsCancellable && "Only cancellable region may have irregular exits"); + assert(!BB->getTerminator() && "Irregular exit must not rejoin the cfg"); + Breaks.emplace_back(BB, Reason, Target); + assertOK(); +} + +void OpenMPIRBuilder::OMPRegionInfo::assertOK() const { +#ifndef NDEBUG + switch (Kind) { + case RegionKind::Function: + assert(DK == OMPD_unknown && "toplevel region is not a specific kind"); + assert(!IsCancellable && "top-level is not cancellable"); + assert(Breaks.empty() && "Topmost region cannot have irregular exits"); + break; + case RegionKind::CanonicalLoop: + // TODO + break; + case RegionKind::Directive: + switch (DK) { + case OMPD_parallel: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_masked: + case OMPD_critical: + case OMPD_ordered: + break; + default: + llvm_unreachable("Not a recognized OpenMP construct with SESE region"); + } + break; + } + + for (const OMPRegionBreakInfo &Break : Breaks) + Break.assertOK(); +#endif +} + +OpenMPIRBuilder::OMPRegionInfo * +OpenMPIRBuilder::getInnermostRegion(omp::Directive DK) { + for (const std::unique_ptr &R : reverse(RegionStack)) { + if (R->Kind == RegionKind::Directive && R->DK == DK) + return R.get(); + } + return RegionStack.front().get(); +} + +bool OpenMPIRBuilder::isLastFinalizationInfoCancellable(omp::Directive DK) { + return getInnermostRegion(DK)->IsCancellable; +} + +OpenMPIRBuilder::OMPRegionInfo * +OpenMPIRBuilder::enterRegion(OpenMPIRBuilder::RegionKind Kind, + omp::Directive DK, bool IsCancellable) { + RegionStack.emplace_back(new OMPRegionInfo(Kind, DK, IsCancellable)); + return RegionStack.back().get(); +} + +void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R) { + assert( + RegionStack.size() >= 2 && + "Expect at least two regions on the stack: toplevel and the one exiting"); + assert(RegionStack.back().get() == R && "balanced region push/pop required"); + + // Trickle down no yet handled breaks. + OMPRegionInfo *Innermost = RegionStack.back().get(); + OMPRegionInfo *NewInnermost = RegionStack.rbegin()[1].get(); + for (OMPRegionBreakInfo &Break : Innermost->Breaks) { + if (Break.Target == R) { + assert(!Break.BB && + "Irregular exit must have been handled by this region"); + } else { + NewInnermost->addBreak(Break.BB, Break.Reason, Break.Target); + } + } + + RegionStack.pop_back(); + NewInnermost->assertOK(); +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK, bool ForceSimpleCall, bool CheckCancelFlag) { @@ -696,7 +835,9 @@ Args); if (UseCancelBarrier && CheckCancelFlag) - emitCancelationCheckImpl(Result, OMPD_parallel); + emitCancelationCheckImpl(Loc, Result, + /* CancelledDirective */ OMPD_parallel, + /* CancelReason */ OMPD_barrier); return Builder.saveIP(); } @@ -704,20 +845,31 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Value *IfCondition, - omp::Directive CanceledDirective) { + omp::Directive CancelledDirective) { if (!updateToLocation(Loc)) return Loc.IP; - // LLVM utilities like blocks with terminators. - auto *UI = Builder.CreateUnreachable(); - - Instruction *ThenTI = UI, *ElseTI = nullptr; - if (IfCondition) - SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); - Builder.SetInsertPoint(ThenTI); + // Create condition for cancel if necessary. + BasicBlock *ContBB = nullptr; + if (IfCondition) { + // EntryBB + // | | + // | ThenBB (".if") + // | | + // ContBB + BasicBlock *EntryBB = Builder.GetInsertBlock(); + ContBB = splitBB(Builder, /*CreateBranch*/ false); + BasicBlock *ThenBB = + BasicBlock::Create(Builder.getContext(), EntryBB->getName() + ".if", + ContBB->getParent(), ContBB); + Builder.CreateCondBr(IfCondition, ThenBB, ContBB); + Builder.SetInsertPoint(ThenBB); + Builder.CreateBr(ContBB); + Builder.SetInsertPoint(ThenBB->getTerminator()); + } Value *CancelKind = nullptr; - switch (CanceledDirective) { + switch (CancelledDirective) { #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ case DirectiveEnum: \ CancelKind = Builder.getInt32(Value); \ @@ -733,23 +885,13 @@ Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; Value *Result = Builder.CreateCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); - auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) { - if (CanceledDirective == OMPD_parallel) { - IRBuilder<>::InsertPointGuard IPG(Builder); - Builder.restoreIP(IP); - createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), - omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, - /* CheckCancelFlag */ false); - } - }; // The actual cancel logic is shared with others, e.g., cancel_barriers. - emitCancelationCheckImpl(Result, CanceledDirective, ExitCB); - - // Update the insertion point and remove the terminator we introduced. - Builder.SetInsertPoint(UI->getParent()); - UI->eraseFromParent(); + emitCancelationCheckImpl(Loc, Result, CancelledDirective, + /* CancelReason */ OMPD_cancel); + if (ContBB) + return {ContBB, ContBB->begin()}; return Builder.saveIP(); } @@ -791,40 +933,40 @@ Entry->setAlignment(Align(1)); } -void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag, - omp::Directive CanceledDirective, - FinalizeCallbackTy ExitCB) { - assert(isLastFinalizationInfoCancellable(CanceledDirective) && +void OpenMPIRBuilder::emitCancelationCheckImpl( + LocationDescription Loc, Value *CancelFlag, + omp::Directive CancelledDirective, omp::Directive CancelReason) { + assert(isLastFinalizationInfoCancellable(CancelledDirective) && "Unexpected cancellation!"); // For a cancel barrier we create two new blocks. BasicBlock *BB = Builder.GetInsertBlock(); - BasicBlock *NonCancellationBlock; - if (Builder.GetInsertPoint() == BB->end()) { - // TODO: This branch will not be needed once we moved to the - // OpenMPIRBuilder codegen completely. - NonCancellationBlock = BasicBlock::Create( - BB->getContext(), BB->getName() + ".cont", BB->getParent()); - } else { - NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint()); - BB->getTerminator()->eraseFromParent(); - Builder.SetInsertPoint(BB); - } + LLVMContext &Ctx = BB->getContext(); + + // Building the folloing control flow: + // + // BB: + // br i1 CancelFlag + // | | + // | PreCancellationBlock (".cncl.fini") + // | | + // | CancellationBlock (".cncl") + // | | + // NonCancellationBlock (".cont") + + BasicBlock *NonCancellationBlock = + splitBBWithSuffix(Builder, /* CreateBranch */ false, ".cont"); BasicBlock *CancellationBlock = BasicBlock::Create( - BB->getContext(), BB->getName() + ".cncl", BB->getParent()); + Ctx, BB->getName() + ".cncl", BB->getParent(), NonCancellationBlock); // Jump to them based on the return value. Value *Cmp = Builder.CreateIsNull(CancelFlag); Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock, /* TODO weight */ nullptr, nullptr); - // From the cancellation block we finalize all variables and go to the - // post finalization block that is known to the FiniCB callback. - Builder.SetInsertPoint(CancellationBlock); - if (ExitCB) - ExitCB(Builder.saveIP()); - auto &FI = FinalizationStack.back(); - FI.FiniCB(Builder.saveIP()); + // Register an irregular exit to be handled by the surrounding construct. + RegionStack.back()->addBreak(CancellationBlock, CancelReason, + getInnermostRegion(CancelledDirective)); // The continuation block is where code generation continues. Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); @@ -909,22 +1051,7 @@ BasicBlock *PRegExitBB = PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); - auto FiniCBWrapper = [&](InsertPointTy IP) { - // Hide "open-ended" blocks from the given FiniCB by setting the right jump - // target to the region exit block. - if (IP.getBlock()->end() == IP.getPoint()) { - IRBuilder<>::InsertPointGuard IPG(Builder); - Builder.restoreIP(IP); - Instruction *I = Builder.CreateBr(PRegExitBB); - IP = InsertPointTy(I->getParent(), I->getIterator()); - } - assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 && - IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB && - "Unexpected insertion point for finalization call!"); - return FiniCB(IP); - }; - - FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); + OMPRegionInfo *ParallelRegion = enterRegion(OMPD_parallel, IsCancellable); // Generate the privatization allocas in the block that will become the entry // of the outlined function. @@ -1055,18 +1182,47 @@ I->eraseFromParent(); }; - // Adjust the finalization stack, verify the adjustment, and call the - // finalize function a last time to finalize values between the pre-fini - // block and the exit block if we left the parallel "the normal way". - auto FiniInfo = FinalizationStack.pop_back_val(); - (void)FiniInfo; - assert(FiniInfo.DK == OMPD_parallel && - "Unexpected finalization stack state!"); - Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); - InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); - FiniCB(PreFiniIP); + // Emit frontend finializations (eg. destructors) at the end of the regular + // exit. + if (FiniCB) { + InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); + FiniCB(PreFiniIP); + } + + // Also emit finializations to each irregular exit. + // Note that we cannot reuse the regular exit finialization code (like + // EmitOMPInlinedRegion and createSections) because some of the cancellation + // may need an additional barrier. + for (OMPRegionBreakInfo &Break : ParallelRegion->Breaks) { + Builder.SetInsertPoint(Break.BB); + + if (FiniCB) { + BasicBlock *AfterFini = splitBBWithSuffix(Builder, true, ".finisplit"); + FiniCB(Builder.saveAndClearIP()); + Builder.SetInsertPoint(AfterFini); + } + + // Unless cancellation has been detected by a barrier itself, need to + // synchronize between threads (after finalization). + if (Break.Reason != OMPD_barrier) { + Builder.restoreIP(emitBarrierImpl(Loc, Break.Reason, + /*ForceSimpleCall*/ false, + /*CheckCancelFlag*/ false)); + } + + // If the break was targeting this parallel region, rejoin after it. + if (Break.Target == ParallelRegion) { + Builder.CreateBr(PRegExitBB); + Builder.ClearInsertionPoint(); + } + + Break.BB = Builder.GetInsertBlock(); + assert(!Break.BB || !Break.BB->getTerminator()); + } + + exitRegion(ParallelRegion); OI.OuterAllocaBB = OuterAllocaBlock; OI.EntryBB = PRegEntryBB; @@ -1262,27 +1418,7 @@ if (!updateToLocation(Loc)) return Loc.IP; - auto FiniCBWrapper = [&](InsertPointTy IP) { - if (IP.getBlock()->end() != IP.getPoint()) - return FiniCB(IP); - // This must be done otherwise any nested constructs using FinalizeOMPRegion - // will fail because that function requires the Finalization Basic Block to - // have a terminator, which is already removed by EmitOMPRegionBody. - // IP is currently at cancelation block. - // We need to backtrack to the condition block to fetch - // the exit block and create a branch from cancelation - // to exit block. - IRBuilder<>::InsertPointGuard IPG(Builder); - Builder.restoreIP(IP); - auto *CaseBB = IP.getBlock()->getSinglePredecessor(); - auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); - auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); - Instruction *I = Builder.CreateBr(ExitBB); - IP = InsertPointTy(I->getParent(), I->getIterator()); - return FiniCB(IP); - }; - - FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable}); + OMPRegionInfo *SectionsRegion = enterRegion(OMPD_sections, IsCancellable); // Each section is emitted as a switch case // Each finalization callback is handled from clang.EmitOMPSectionDirective() @@ -1332,19 +1468,30 @@ InsertPointTy AfterIP = applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait); - // Apply the finalization callback in LoopAfterBB - auto FiniInfo = FinalizationStack.pop_back_val(); - assert(FiniInfo.DK == OMPD_sections && - "Unexpected finalization stack state!"); - if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) { - Builder.restoreIP(AfterIP); - BasicBlock *FiniBB = - splitBBWithSuffix(Builder, /*CreateBranch=*/true, "sections.fini"); - CB(Builder.saveIP()); - AfterIP = {FiniBB, FiniBB->begin()}; + Builder.restoreIP(AfterIP); + BasicBlock *Finish = splitBB(Builder, true, "section_finish"); + if (FiniCB) { + Builder.SetInsertPoint(Finish); + Finish = splitBB(Builder, true, "section_fini"); + FiniCB(Builder.saveAndClearIP()); } - return AfterIP; + for (OMPRegionBreakInfo &Break : SectionsRegion->Breaks) { + if (Break.Target == SectionsRegion) { + Builder.SetInsertPoint(Break.BB); + Builder.CreateBr(Finish); + Break.BB = nullptr; + } else if (FiniCB) { + Builder.SetInsertPoint(Break.BB); + Break.BB = + splitBBWithSuffix(Builder, /* CreateBranch */ true, ".finisplit"); + FiniCB(Builder.saveAndClearIP()); + } + } + + exitRegion(SectionsRegion); + + return {Finish, Finish->begin()}; } OpenMPIRBuilder::InsertPointTy @@ -1354,31 +1501,11 @@ if (!updateToLocation(Loc)) return Loc.IP; - auto FiniCBWrapper = [&](InsertPointTy IP) { - if (IP.getBlock()->end() != IP.getPoint()) - return FiniCB(IP); - // This must be done otherwise any nested constructs using FinalizeOMPRegion - // will fail because that function requires the Finalization Basic Block to - // have a terminator, which is already removed by EmitOMPRegionBody. - // IP is currently at cancelation block. - // We need to backtrack to the condition block to fetch - // the exit block and create a branch from cancelation - // to exit block. - IRBuilder<>::InsertPointGuard IPG(Builder); - Builder.restoreIP(IP); - auto *CaseBB = Loc.IP.getBlock(); - auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); - auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); - Instruction *I = Builder.CreateBr(ExitBB); - IP = InsertPointTy(I->getParent(), I->getIterator()); - return FiniCB(IP); - }; - - Directive OMPD = Directive::OMPD_sections; + Directive OMPD = Directive::OMPD_section; // Since we are using Finalization Callback here, HasFinalize // and IsCancellable have to be true - return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper, - /*Conditional*/ false, /*hasFinalize*/ true, + return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, {}, + /*Conditional*/ false, /*IsCancellable*/ true); } @@ -1579,7 +1706,7 @@ Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, - /*Conditional*/ true, /*hasFinalize*/ true); + /*Conditional*/ true); } OpenMPIRBuilder::InsertPointTy @@ -1604,7 +1731,7 @@ Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd); return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, - /*Conditional*/ true, /*hasFinalize*/ true); + /*Conditional*/ true); } CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( @@ -1693,10 +1820,15 @@ Builder.CreateBr(CL->getPreheader()); } + OMPRegionInfo *LoopRegion = enterRegion(RegionKind::CanonicalLoop, + OMPD_unknown, /*IsCancellable*/ true); + // Emit the body content. We do it after connecting the loop to the CFG to // avoid that the callback encounters degenerate BBs. BodyGenCB(CL->getBodyIP(), CL->getIndVar()); + exitRegion(LoopRegion); + #ifndef NDEBUG CL->assertOK(); #endif @@ -3001,8 +3133,7 @@ // __kmpc_barrier EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, - /*Conditional*/ true, - /*hasFinalize*/ true); + /*Conditional*/ true); if (!IsNowait) createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, @@ -3041,7 +3172,7 @@ Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, - /*Conditional*/ false, /*hasFinalize*/ true); + /*Conditional*/ false); } OpenMPIRBuilder::InsertPointTy @@ -3116,16 +3247,15 @@ } return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, - /*Conditional*/ false, /*hasFinalize*/ true); + /*Conditional*/ false); } OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, - bool HasFinalize, bool IsCancellable) { + bool IsCancellable) { - if (HasFinalize) - FinalizationStack.push_back({FiniCB, OMPD, IsCancellable}); + OMPRegionInfo *Region = enterRegion(OMPD, IsCancellable); // Create inlined region's entry and body blocks, in preparation // for conditional creation @@ -3144,12 +3274,62 @@ BodyGenCB(/* AllocaIP */ InsertPointTy(), /* CodeGenIP */ Builder.saveIP()); + // Exits are handled the following way: + // + // 1. For the regular region exit, \p FinCB is used by the caller to emit + // finalization code somehwere on the control path exiting the region. + // exitRegion itself does nothing. + // + // 2. For irregular region exits that rejoing with the control flow after + // this region, exitRegion emits a branch to FiniBB containing the + // finalization code. This is typically that same code as for case 1 + // avoiding emitting the same finialization code multiple times. + // + // 3. For irregular region exits that rejoin a surrounding region, exitRegion + // calls FiniCB to insert the finalization code into the exiting control + // path. The irregular exit is then added as an irregular exit of the + // sourrounding loop that, upon its exit, can add its own finialization + // code and/or rejoin the control flow there. + // + // TODO: Clang's codegen emits finalization code only once and inserts a + // switch to jump back to the target code path (CGF.EmitBranchThroughCleanup). + // Currently in the OpenMPIRBuilder, we emit the finialization multiple times + // for each path exiting the region (non-cancellation and each cancellation + // check). + + BasicBlock *FiniStartBB = FiniBB; + if (FiniCB) { + Builder.SetInsertPoint(FiniBB, FiniBB->begin()); + FiniBB = splitBBWithSuffix(Builder, /* CreateBranch */ true, ".finisplit"); + FiniCB(Builder.saveAndClearIP()); + } + // emit exit call and do any needed finalization. auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && FiniBB->getTerminator()->getSuccessor(0) == ExitBB && "Unexpected control flow graph state!!"); - emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); + emitCommonDirectiveExit(OMPD, FinIP, ExitCall); + + for (OMPRegionBreakInfo &Break : Region->Breaks) { + if (Break.Target == Region) { + Builder.SetInsertPoint(Break.BB); + Builder.CreateBr(FiniStartBB); + Break.BB = nullptr; + } else if (FiniCB) { + Builder.SetInsertPoint(Break.BB); + Break.BB = + splitBBWithSuffix(Builder, /* CreateBranch */ true, ".finisplit"); + FiniCB(Builder.saveAndClearIP()); + } + } + + exitRegion(Region); + + // FIXME: Only added to not break tests. + if (FiniStartBB != FiniBB) + MergeBlockIntoPredecessor(FiniStartBB); + assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && "Unexpected Control Flow State!"); MergeBlockIntoPredecessor(FiniBB); @@ -3204,23 +3384,6 @@ Builder.restoreIP(FinIP); - // If there is finalization to do, emit it before the exit call - if (HasFinalize) { - assert(!FinalizationStack.empty() && - "Unexpected finalization stack state!"); - - FinalizationInfo Fi = FinalizationStack.pop_back_val(); - assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); - - Fi.FiniCB(FinIP); - - BasicBlock *FiniBB = FinIP.getBlock(); - Instruction *FiniBBTI = FiniBB->getTerminator(); - - // set Builder IP for call creation - Builder.SetInsertPoint(FiniBBTI); - } - if (!ExitCall) return Builder.saveIP(); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -951,8 +951,6 @@ return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) {}; - /// Create a sequential execution region within a merged parallel region, /// encapsulated in a master construct with a barrier for synchronization. auto CreateSequentialRegion = [&](Function *OuterFn, @@ -983,7 +981,6 @@ assert(SeqEndBB != nullptr && "SeqEndBB should not be null"); SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB); }; - auto FiniCB = [&](InsertPointTy CodeGenIP) {}; // Find outputs from the sequential region to outside users and // broadcast their values to them. @@ -1026,7 +1023,7 @@ OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(ParentBB, ParentBB->end()), DL); InsertPointTy SeqAfterIP = - OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB); + OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, {}); OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel); @@ -1101,7 +1098,7 @@ // Create the merged parallel region with default proc binding, to // avoid overriding binding settings, and without explicit cancellation. InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel( - Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, + Loc, AllocaIP, BodyGenCB, PrivCB, {}, nullptr, nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false); BranchInst::Create(AfterBB, AfterIP.getBlock()); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -355,201 +355,6 @@ EXPECT_FALSE(verifyModule(*M, &errs())); } -TEST_F(OpenMPIRBuilderTest, CreateCancel) { - using InsertPointTy = OpenMPIRBuilder::InsertPointTy; - OpenMPIRBuilder OMPBuilder(*M); - OMPBuilder.initialize(); - - BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); - new UnreachableInst(Ctx, CBB); - auto FiniCB = [&](InsertPointTy IP) { - ASSERT_NE(IP.getBlock(), nullptr); - ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); - BranchInst::Create(CBB, IP.getBlock()); - }; - OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); - - IRBuilder<> Builder(BB); - - OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); - auto NewIP = OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel); - Builder.restoreIP(NewIP); - EXPECT_FALSE(M->global_empty()); - EXPECT_EQ(M->size(), 4U); - EXPECT_EQ(F->size(), 4U); - EXPECT_EQ(BB->size(), 4U); - - CallInst *GTID = dyn_cast(&BB->front()); - EXPECT_NE(GTID, nullptr); - EXPECT_EQ(GTID->arg_size(), 1U); - EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); - EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); - - CallInst *Cancel = dyn_cast(GTID->getNextNode()); - EXPECT_NE(Cancel, nullptr); - EXPECT_EQ(Cancel->arg_size(), 3U); - EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel"); - EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory()); - EXPECT_EQ(Cancel->getNumUses(), 1U); - Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); - EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); - EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock()); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); - CallInst *GTID1 = dyn_cast(&CancelBBTI->getSuccessor(1)->front()); - EXPECT_NE(GTID1, nullptr); - EXPECT_EQ(GTID1->arg_size(), 1U); - EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); - EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); - CallInst *Barrier = dyn_cast(GTID1->getNextNode()); - EXPECT_NE(Barrier, nullptr); - EXPECT_EQ(Barrier->arg_size(), 2U); - EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); - EXPECT_EQ(Barrier->getNumUses(), 0U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), - 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); - - EXPECT_EQ(cast(Cancel)->getArgOperand(1), GTID); - - OMPBuilder.popFinalizationCB(); - - Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M, &errs())); -} - -TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { - using InsertPointTy = OpenMPIRBuilder::InsertPointTy; - OpenMPIRBuilder OMPBuilder(*M); - OMPBuilder.initialize(); - - BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); - new UnreachableInst(Ctx, CBB); - auto FiniCB = [&](InsertPointTy IP) { - ASSERT_NE(IP.getBlock(), nullptr); - ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); - BranchInst::Create(CBB, IP.getBlock()); - }; - OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); - - IRBuilder<> Builder(BB); - - OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); - auto NewIP = OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel); - Builder.restoreIP(NewIP); - EXPECT_FALSE(M->global_empty()); - EXPECT_EQ(M->size(), 4U); - EXPECT_EQ(F->size(), 7U); - EXPECT_EQ(BB->size(), 1U); - ASSERT_TRUE(isa(BB->getTerminator())); - ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U); - BB = BB->getTerminator()->getSuccessor(0); - EXPECT_EQ(BB->size(), 4U); - - CallInst *GTID = dyn_cast(&BB->front()); - EXPECT_NE(GTID, nullptr); - EXPECT_EQ(GTID->arg_size(), 1U); - EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); - EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); - - CallInst *Cancel = dyn_cast(GTID->getNextNode()); - EXPECT_NE(Cancel, nullptr); - EXPECT_EQ(Cancel->arg_size(), 3U); - EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel"); - EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory()); - EXPECT_EQ(Cancel->getNumUses(), 1U); - Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); - EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); - EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), - NewIP.getBlock()); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); - CallInst *GTID1 = dyn_cast(&CancelBBTI->getSuccessor(1)->front()); - EXPECT_NE(GTID1, nullptr); - EXPECT_EQ(GTID1->arg_size(), 1U); - EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); - EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); - CallInst *Barrier = dyn_cast(GTID1->getNextNode()); - EXPECT_NE(Barrier, nullptr); - EXPECT_EQ(Barrier->arg_size(), 2U); - EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); - EXPECT_EQ(Barrier->getNumUses(), 0U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), - 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); - - EXPECT_EQ(cast(Cancel)->getArgOperand(1), GTID); - - OMPBuilder.popFinalizationCB(); - - Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M, &errs())); -} - -TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { - using InsertPointTy = OpenMPIRBuilder::InsertPointTy; - OpenMPIRBuilder OMPBuilder(*M); - OMPBuilder.initialize(); - - BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); - new UnreachableInst(Ctx, CBB); - auto FiniCB = [&](InsertPointTy IP) { - ASSERT_NE(IP.getBlock(), nullptr); - ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); - BranchInst::Create(CBB, IP.getBlock()); - }; - OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); - - IRBuilder<> Builder(BB); - - OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); - auto NewIP = OMPBuilder.createBarrier(Loc, OMPD_for); - Builder.restoreIP(NewIP); - EXPECT_FALSE(M->global_empty()); - EXPECT_EQ(M->size(), 3U); - EXPECT_EQ(F->size(), 4U); - EXPECT_EQ(BB->size(), 4U); - - CallInst *GTID = dyn_cast(&BB->front()); - EXPECT_NE(GTID, nullptr); - EXPECT_EQ(GTID->arg_size(), 1U); - EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); - EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); - - CallInst *Barrier = dyn_cast(GTID->getNextNode()); - EXPECT_NE(Barrier, nullptr); - EXPECT_EQ(Barrier->arg_size(), 2U); - EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); - EXPECT_EQ(Barrier->getNumUses(), 1U); - Instruction *BarrierBBTI = Barrier->getParent()->getTerminator(); - EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U); - EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock()); - EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U); - EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), - 1U); - EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), - CBB); - - EXPECT_EQ(cast(Barrier)->getArgOperand(1), GTID); - - OMPBuilder.popFinalizationCB(); - - Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M, &errs())); -} - TEST_F(OpenMPIRBuilderTest, DbgLoc) { OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize(); @@ -1108,25 +913,6 @@ OMPBuilder.finalize(); EXPECT_FALSE(verifyModule(*M, &errs())); - - BasicBlock *ExitBB = nullptr; - for (const User *Usr : FakeDestructor->users()) { - const CallInst *CI = dyn_cast(Usr); - ASSERT_EQ(CI->getCalledFunction(), FakeDestructor); - ASSERT_TRUE(isa(CI->getNextNode())); - ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U); - if (ExitBB) - ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB); - else - ExitBB = CI->getNextNode()->getSuccessor(0); - ASSERT_EQ(ExitBB->size(), 1U); - if (!isa(ExitBB->front())) { - ASSERT_TRUE(isa(ExitBB->front())); - ASSERT_EQ(cast(ExitBB->front()).getNumSuccessors(), 1U); - ASSERT_TRUE(isa( - cast(ExitBB->front()).getSuccessor(0)->front())); - } - } } TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { @@ -1176,13 +962,12 @@ ReplacementValue = &Inner; return CodeGenIP; }; - auto FiniCB = [](InsertPointTy) {}; IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); IRBuilder<>::InsertPoint AfterIP = - OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, - nullptr, nullptr, OMP_PROC_BIND_default, false); + OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, {}, nullptr, + nullptr, OMP_PROC_BIND_default, false); Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); @@ -3721,11 +3506,8 @@ return Builder.saveIP(); }; - // Do nothing in finalization. - auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; }; - InsertPointTy AfterIP = - OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB, + OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, {}, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false); @@ -3977,16 +3759,12 @@ return Builder.saveIP(); }; - // Do nothing in finalization. - auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; }; - - Builder.restoreIP( - OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB, - FiniCB, /* IfCondition */ nullptr, - /* NumThreads */ nullptr, OMP_PROC_BIND_default, - /* IsCancellable */ false)); + Builder.restoreIP(OMPBuilder.createParallel( + Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB, {}, /* IfCondition */ nullptr, + /* NumThreads */ nullptr, OMP_PROC_BIND_default, + /* IsCancellable */ false)); InsertPointTy AfterIP = OMPBuilder.createParallel( - {Builder.saveIP(), DL}, OuterAllocaIP, SecondBodyGenCB, PrivCB, FiniCB, + {Builder.saveIP(), DL}, OuterAllocaIP, SecondBodyGenCB, PrivCB, {}, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false); @@ -4075,7 +3853,6 @@ llvm::SmallVector SectionCBVector; llvm::SmallVector CaseBBs; - auto FiniCB = [&](InsertPointTy IP) {}; auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; SectionCBVector.push_back(SectionCB); @@ -4085,7 +3862,7 @@ IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, - PrivCB, FiniCB, false, false)); + PrivCB, {}, false, false)); Builder.CreateRetVoid(); // Required at the end of the function EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr); EXPECT_FALSE(verifyModule(*M, &errs())); @@ -4223,10 +4000,9 @@ auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy IP) {}; Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, - PrivCB, FiniCB, false, true)); + PrivCB, {}, false, true)); Builder.CreateRetVoid(); // Required at the end of the function for (auto &Inst : instructions(*F)) { EXPECT_FALSE(isa(Inst) && diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -281,7 +281,6 @@ // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; llvm::Value *ifCond = nullptr; if (auto ifExprVar = opInst.if_expr_var()) @@ -299,7 +298,7 @@ findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel( - ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind, + ompLoc, allocaIP, bodyGenCB, privCB, {}, ifCond, numThreads, pbKind, isCancellable)); return bodyGenStatus; @@ -324,11 +323,10 @@ // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( - ompLoc, bodyGenCB, finiCB)); + ompLoc, bodyGenCB, {})); return success(); } @@ -352,7 +350,6 @@ // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); @@ -371,7 +368,7 @@ static_cast(criticalDeclareOp.hint_val())); } builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical( - ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint)); + ompLoc, bodyGenCB, {}, criticalOp.name().getValueOr(""), hint)); return success(); } @@ -583,12 +580,11 @@ // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP( moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( - ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd())); + ompLoc, bodyGenCB, {}, !orderedRegionOp.simd())); return bodyGenStatus; } @@ -648,14 +644,12 @@ // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections( - ompLoc, allocaIP, sectionCBs, privCB, finiCB, false, - sectionsOp.nowait())); + ompLoc, allocaIP, sectionCBs, privCB, {}, false, sectionsOp.nowait())); return bodyGenStatus; } @@ -671,9 +665,8 @@ convertOmpOpRegions(singleOp.region(), "omp.single.region", builder, moduleTranslation, bodyGenStatus); }; - auto finiCB = [&](InsertPointTy codeGenIP) {}; builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle( - ompLoc, bodyCB, finiCB, singleOp.nowait(), /*DidIt=*/nullptr)); + ompLoc, bodyCB, {}, singleOp.nowait(), /*DidIt=*/nullptr)); return bodyGenStatus; }