diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1677,6 +1677,41 @@ } return OS.str().str(); } + +void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( + CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, Twine RegionName) { + CGBuilderTy &Builder = CGF.Builder; + Builder.restoreIP(CodeGenIP); + llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, + "." + RegionName + ".after"); + + { + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); + CGF.EmitStmt(RegionBodyStmt); + } + + if (Builder.saveIP().isSet()) + Builder.CreateBr(FiniBB); +} + +void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( + CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, Twine RegionName) { + CGBuilderTy &Builder = CGF.Builder; + Builder.restoreIP(CodeGenIP); + llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, + "." + RegionName + ".after"); + + { + OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); + CGF.EmitStmt(RegionBodyStmt); + } + + if (Builder.saveIP().isSet()) + Builder.CreateBr(FiniBB); +} + void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { if (CGM.getLangOpts().OpenMPIRBuilder) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); @@ -1719,13 +1754,10 @@ const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); - auto BodyGenCB = [ParallelRegionBodyStmt, - this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - llvm::BasicBlock &ContinuationBB) { - OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP, - ContinuationBB); - OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt, - CodeGenIP, ContinuationBB); + auto BodyGenCB = [&, this](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP) { + OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( + *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel"); }; CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); @@ -3983,22 +4015,17 @@ if (CS) { for (const Stmt *SubStmt : CS->children()) { auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, - llvm::BasicBlock &FiniBB) { - OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, - FiniBB); - OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP, - FiniBB); + InsertPointTy CodeGenIP) { + OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( + *this, SubStmt, AllocaIP, CodeGenIP, "section"); }; SectionCBVector.push_back(SectionCB); } } else { auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, - llvm::BasicBlock &FiniBB) { - OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); - OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP, - FiniBB); + InsertPointTy CodeGenIP) { + OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( + *this, CapturedStmt, AllocaIP, CodeGenIP, "section"); }; SectionCBVector.push_back(SectionCB); } @@ -4051,11 +4078,9 @@ }; auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, - llvm::BasicBlock &FiniBB) { - OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); - OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt, - CodeGenIP, FiniBB); + InsertPointTy CodeGenIP) { + OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( + *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section"); }; LexicalScope Scope(*this, S.getSourceRange()); @@ -4134,11 +4159,9 @@ }; auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, - llvm::BasicBlock &FiniBB) { - OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); - OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt, - CodeGenIP, FiniBB); + InsertPointTy CodeGenIP) { + OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( + *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master"); }; LexicalScope Scope(*this, S.getSourceRange()); @@ -4182,11 +4205,9 @@ }; auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, - llvm::BasicBlock &FiniBB) { - OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); - OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt, - CodeGenIP, FiniBB); + InsertPointTy CodeGenIP) { + OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( + *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked"); }; LexicalScope Scope(*this, S.getSourceRange()); @@ -4224,11 +4245,9 @@ }; auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, - llvm::BasicBlock &FiniBB) { - OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); - OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt, - CodeGenIP, FiniBB); + InsertPointTy CodeGenIP) { + OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( + *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical"); }; LexicalScope Scope(*this, S.getSourceRange()); @@ -5564,10 +5583,13 @@ }; auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, - llvm::BasicBlock &FiniBB) { + InsertPointTy CodeGenIP) { + Builder.restoreIP(CodeGenIP); + const CapturedStmt *CS = S.getInnermostCapturedStmt(); if (C) { + llvm::BasicBlock *FiniBB = splitBBWithSuffix( + Builder, /*CreateBranch=*/false, ".ordered.after"); llvm::SmallVector CapturedVars; GenerateOpenMPCapturedVars(*CS, CapturedVars); llvm::Function *OutlinedFn = @@ -5575,13 +5597,11 @@ assert(S.getBeginLoc().isValid() && "Outlined function call location must be valid."); ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc()); - OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, FiniBB, + OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB, OutlinedFn, CapturedVars); } else { - OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, - FiniBB); - OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CS->getCapturedStmt(), - CodeGenIP, FiniBB); + OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( + *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered"); } }; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1791,26 +1791,17 @@ } /// Emit the body of an OMP region - /// \param CGF The Codegen function this belongs to - /// \param RegionBodyStmt The body statement for the OpenMP region being - /// generated - /// \param CodeGenIP Insertion point for generating the body code. - /// \param FiniBB The finalization basic block - static void EmitOMPRegionBody(CodeGenFunction &CGF, - const Stmt *RegionBodyStmt, - InsertPointTy CodeGenIP, - llvm::BasicBlock &FiniBB) { - llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); - if (llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator()) - CodeGenIPBBTI->eraseFromParent(); - - CGF.Builder.SetInsertPoint(CodeGenIPBB); - - CGF.EmitStmt(RegionBodyStmt); - - if (CGF.Builder.saveIP().isSet()) - CGF.Builder.CreateBr(&FiniBB); - } + /// \param CGF The Codegen function this belongs to + /// \param RegionBodyStmt The body statement for the OpenMP region being + /// generated + /// \param AllocaIP Where to insert alloca instructions + /// \param CodeGenIP Where to insert the region code + /// \param RegionName Name to be used for new blocks + static void EmitOMPInlinedRegionBody(CodeGenFunction &CGF, + const Stmt *RegionBodyStmt, + InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + Twine RegionName); static void EmitCaptureStmt(CodeGenFunction &CGF, InsertPointTy CodeGenIP, llvm::BasicBlock &FiniBB, llvm::Function *Fn, @@ -1830,12 +1821,25 @@ CGF.Builder.CreateBr(&FiniBB); } + /// Emit the body of an OMP region that will be outlined in + /// OpenMPIRBuilder::finalize(). + /// \param CGF The Codegen function this belongs to + /// \param RegionBodyStmt The body statement for the OpenMP region being + /// generated + /// \param AllocaIP Where to insert alloca instructions + /// \param CodeGenIP Where to insert the region code + /// \param RegionName Name to be used for new blocks + static void EmitOMPOutlinedRegionBody(CodeGenFunction &CGF, + const Stmt *RegionBodyStmt, + InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + Twine RegionName); + /// RAII for preserving necessary info during Outlined region body codegen. class OutlinedRegionBodyRAII { llvm::AssertingVH OldAllocaIP; CodeGenFunction::JumpDest OldReturnBlock; - CGBuilderTy::InsertPoint IP; CodeGenFunction &CGF; public: @@ -1846,7 +1850,6 @@ "Must specify Insertion point for allocas of outlined function"); OldAllocaIP = CGF.AllocaInsertPt; CGF.AllocaInsertPt = &*AllocaIP.getPoint(); - IP = CGF.Builder.saveIP(); OldReturnBlock = CGF.ReturnBlock; CGF.ReturnBlock = CGF.getJumpDestInCurrentScope(&RetBB); @@ -1855,7 +1858,6 @@ ~OutlinedRegionBodyRAII() { CGF.AllocaInsertPt = OldAllocaIP; CGF.ReturnBlock = OldReturnBlock; - CGF.Builder.restoreIP(IP); } }; diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -1325,6 +1325,14 @@ // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1341,14 +1349,6 @@ // CHECK3-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LASTITER27:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LOWERBOUND28:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_UPPERBOUND29:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK3-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK3: omp_parallel: // CHECK3-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 0 @@ -1382,9 +1382,20 @@ // CHECK3-NEXT: [[TMP4:%.*]] = add i32 [[OMP_SECTION_LOOP_IV]], [[TMP0]] // CHECK3-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 1 // CHECK3-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 -// CHECK3-NEXT: switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_INC]] [ +// CHECK3-NEXT: switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER:%.*]] [ // CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]] // CHECK3-NEXT: ] +// CHECK3: omp_section_loop.body.case: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) +// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case.split: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body.sections.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC]] // CHECK3: omp_section_loop.inc: // CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1 // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] @@ -1394,125 +1405,126 @@ // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK3: omp_section_loop.after: -// CHECK3-NEXT: br label [[OMP_SECTIONS_END:%.*]] -// CHECK3: omp_sections.end: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] +// CHECK3: omp_section_loop.aftersections.fini: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] // CHECK3: omp_section_loop.preheader13: -// CHECK3-NEXT: store i32 0, i32* [[P_LOWERBOUND28]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[P_UPPERBOUND29]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[P_STRIDE30]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 0) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]] -// CHECK3-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1 +// CHECK3-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, i32* [[P_LASTITER28]], i32* [[P_LOWERBOUND29]], i32* [[P_UPPERBOUND30]], i32* [[P_STRIDE31]], i32 1, i32 0) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND29]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] +// CHECK3-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] // CHECK3: omp_section_loop.header14: // CHECK3-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] // CHECK3: omp_section_loop.cond15: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP10]] +// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] // CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] // CHECK3: omp_section_loop.body16: -// CHECK3-NEXT: [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP7]] -// CHECK3-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 1 -// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], 0 -// CHECK3-NEXT: switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC17]] [ +// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 +// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ // CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] // CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] // CHECK3-NEXT: ] +// CHECK3: omp_section_loop.body.case23: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case23.split: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case23.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body.case25: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case25.split: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] +// CHECK3: omp_section_loop.body.case25.section.after26: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case25.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body16.sections.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC17]] // CHECK3: omp_section_loop.inc17: // CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] // CHECK3: omp_section_loop.exit18: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] // CHECK3: omp_section_loop.after19: -// CHECK3-NEXT: br label [[OMP_SECTIONS_END33:%.*]] -// CHECK3: omp_sections.end33: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] +// CHECK3: omp_section_loop.after19sections.fini: +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK3-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK3-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK3-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load float, float* @flag, align 4 -// CHECK3-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP26]], 0.000000e+00 +// CHECK3-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 +// CHECK3-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 // CHECK3-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK3-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK3: omp_section_loop.body.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) -// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0 -// CHECK3-NEXT: br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case.split: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] // CHECK3: omp_section_loop.body.case.cncl: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK3: omp_section_loop.body.case23: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) -// CHECK3-NEXT: [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0 -// CHECK3-NEXT: br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case23.split: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK3: omp_section_loop.body.case23.cncl: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK3: omp_section_loop.body.case25: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3) -// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0 -// CHECK3-NEXT: br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case25.split: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK3: omp_section_loop.body.case25.cncl: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK3: .cancel.continue: @@ -1600,6 +1612,8 @@ // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK3-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK3-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 +// CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] +// CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] @@ -1780,6 +1794,8 @@ // CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 // CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] // CHECK3: .omp.sections.case2.split: +// CHECK3-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] +// CHECK3: .omp.sections.case2.section.after: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case2.cncl: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END]] @@ -1961,6 +1977,14 @@ // CHECK4-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1977,14 +2001,6 @@ // CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK4-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LASTITER27:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LOWERBOUND28:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_UPPERBOUND29:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK4-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK4: omp_parallel: // CHECK4-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 0 @@ -2018,9 +2034,20 @@ // CHECK4-NEXT: [[TMP4:%.*]] = add i32 [[OMP_SECTION_LOOP_IV]], [[TMP0]] // CHECK4-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 1 // CHECK4-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 -// CHECK4-NEXT: switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_INC]] [ +// CHECK4-NEXT: switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER:%.*]] [ // CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]] // CHECK4-NEXT: ] +// CHECK4: omp_section_loop.body.case: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) +// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case.split: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body.sections.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC]] // CHECK4: omp_section_loop.inc: // CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1 // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] @@ -2030,125 +2057,126 @@ // CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK4: omp_section_loop.after: -// CHECK4-NEXT: br label [[OMP_SECTIONS_END:%.*]] -// CHECK4: omp_sections.end: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] +// CHECK4: omp_section_loop.aftersections.fini: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] // CHECK4: omp_section_loop.preheader13: -// CHECK4-NEXT: store i32 0, i32* [[P_LOWERBOUND28]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[P_UPPERBOUND29]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[P_STRIDE30]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 0) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]] -// CHECK4-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1 +// CHECK4-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, i32* [[P_LASTITER28]], i32* [[P_LOWERBOUND29]], i32* [[P_UPPERBOUND30]], i32* [[P_STRIDE31]], i32 1, i32 0) +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND29]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] +// CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] // CHECK4: omp_section_loop.header14: // CHECK4-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] // CHECK4: omp_section_loop.cond15: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP10]] +// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] // CHECK4-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] // CHECK4: omp_section_loop.body16: -// CHECK4-NEXT: [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP7]] -// CHECK4-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 1 -// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], 0 -// CHECK4-NEXT: switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC17]] [ +// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK4-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 +// CHECK4-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 +// CHECK4-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ // CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] // CHECK4-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] // CHECK4-NEXT: ] +// CHECK4: omp_section_loop.body.case23: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) +// CHECK4-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 +// CHECK4-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case23.split: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case23.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body.case25: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK4-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 +// CHECK4-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case25.split: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] +// CHECK4: omp_section_loop.body.case25.section.after26: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case25.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body16.sections.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC17]] // CHECK4: omp_section_loop.inc17: // CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] // CHECK4: omp_section_loop.exit18: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] // CHECK4: omp_section_loop.after19: -// CHECK4-NEXT: br label [[OMP_SECTIONS_END33:%.*]] -// CHECK4: omp_sections.end33: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] +// CHECK4: omp_section_loop.after19sections.fini: +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK4-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK4-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK4-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK4-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load float, float* @flag, align 4 -// CHECK4-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP26]], 0.000000e+00 +// CHECK4-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 +// CHECK4-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 // CHECK4-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK4-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) -// CHECK4-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK4-NEXT: br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK4-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK4-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK4: .cancel.exit: // CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK4: omp_section_loop.body.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) -// CHECK4-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0 -// CHECK4-NEXT: br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case.split: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] // CHECK4: omp_section_loop.body.case.cncl: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK4: omp_section_loop.body.case23: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) -// CHECK4-NEXT: [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0 -// CHECK4-NEXT: br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case23.split: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK4: omp_section_loop.body.case23.cncl: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK4: omp_section_loop.body.case25: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3) -// CHECK4-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0 -// CHECK4-NEXT: br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case25.split: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK4: omp_section_loop.body.case25.cncl: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK4: .cancel.continue: @@ -2236,6 +2264,8 @@ // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK4-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK4-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] +// CHECK4: omp.par.region.parallel.after: // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] @@ -2416,6 +2446,8 @@ // CHECK4-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 // CHECK4-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] // CHECK4: .omp.sections.case2.split: +// CHECK4-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] +// CHECK4: .omp.sections.case2.section.after: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK4: .omp.sections.case2.cncl: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END]] @@ -3837,6 +3869,14 @@ // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK9-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3853,14 +3893,6 @@ // CHECK9-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK9-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_LASTITER27:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_LOWERBOUND28:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_UPPERBOUND29:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK9-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK9: omp_parallel: // CHECK9-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 0 @@ -3894,9 +3926,20 @@ // CHECK9-NEXT: [[TMP4:%.*]] = add i32 [[OMP_SECTION_LOOP_IV]], [[TMP0]] // CHECK9-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 1 // CHECK9-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 -// CHECK9-NEXT: switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_INC]] [ +// CHECK9-NEXT: switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER:%.*]] [ // CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]] // CHECK9-NEXT: ] +// CHECK9: omp_section_loop.body.case: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) +// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK9-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case.split: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body.sections.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC]] // CHECK9: omp_section_loop.inc: // CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1 // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] @@ -3906,125 +3949,126 @@ // CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK9: omp_section_loop.after: -// CHECK9-NEXT: br label [[OMP_SECTIONS_END:%.*]] -// CHECK9: omp_sections.end: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] +// CHECK9: omp_section_loop.aftersections.fini: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] // CHECK9: omp_section_loop.preheader13: -// CHECK9-NEXT: store i32 0, i32* [[P_LOWERBOUND28]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[P_UPPERBOUND29]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[P_STRIDE30]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 0) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]] -// CHECK9-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1 +// CHECK9-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 +// CHECK9-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 +// CHECK9-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, i32* [[P_LASTITER28]], i32* [[P_LOWERBOUND29]], i32* [[P_UPPERBOUND30]], i32* [[P_STRIDE31]], i32 1, i32 0) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND29]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] +// CHECK9-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] // CHECK9: omp_section_loop.header14: // CHECK9-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] // CHECK9: omp_section_loop.cond15: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP10]] +// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] // CHECK9-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] // CHECK9: omp_section_loop.body16: -// CHECK9-NEXT: [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP7]] -// CHECK9-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 1 -// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], 0 -// CHECK9-NEXT: switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC17]] [ +// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 +// CHECK9-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ // CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] // CHECK9-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] // CHECK9-NEXT: ] +// CHECK9: omp_section_loop.body.case23: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) +// CHECK9-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case23.split: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case23.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body.case25: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK9-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case25.split: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] +// CHECK9: omp_section_loop.body.case25.section.after26: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case25.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body16.sections.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC17]] // CHECK9: omp_section_loop.inc17: // CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] // CHECK9: omp_section_loop.exit18: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]]) -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] // CHECK9: omp_section_loop.after19: -// CHECK9-NEXT: br label [[OMP_SECTIONS_END33:%.*]] -// CHECK9: omp_sections.end33: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] +// CHECK9: omp_section_loop.after19sections.fini: +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK9-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load float, float* @flag, align 4 -// CHECK9-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP26]], 0.000000e+00 +// CHECK9-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 +// CHECK9-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 // CHECK9-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK9-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: // CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK9: omp_section_loop.body.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) -// CHECK9-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case.split: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] // CHECK9: omp_section_loop.body.case.cncl: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK9: omp_section_loop.body.case23: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) -// CHECK9-NEXT: [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case23.split: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK9: omp_section_loop.body.case23.cncl: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK9: omp_section_loop.body.case25: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3) -// CHECK9-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0 -// CHECK9-NEXT: br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case25.split: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK9: omp_section_loop.body.case25.cncl: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK9: .cancel.continue: @@ -4112,6 +4156,8 @@ // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK9-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK9-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 +// CHECK9-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] +// CHECK9: omp.par.region.parallel.after: // CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK9: omp.par.pre_finalize: // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] @@ -4292,6 +4338,8 @@ // CHECK9-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 // CHECK9-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] // CHECK9: .omp.sections.case2.split: +// CHECK9-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] +// CHECK9: .omp.sections.case2.section.after: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK9: .omp.sections.case2.cncl: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END]] @@ -4473,6 +4521,14 @@ // CHECK10-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK10-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4489,14 +4545,6 @@ // CHECK10-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK10-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK10-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_LASTITER27:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_LOWERBOUND28:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_UPPERBOUND29:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK10-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK10: omp_parallel: // CHECK10-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 0 @@ -4530,9 +4578,20 @@ // CHECK10-NEXT: [[TMP4:%.*]] = add i32 [[OMP_SECTION_LOOP_IV]], [[TMP0]] // CHECK10-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 1 // CHECK10-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 -// CHECK10-NEXT: switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_INC]] [ +// CHECK10-NEXT: switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER:%.*]] [ // CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]] // CHECK10-NEXT: ] +// CHECK10: omp_section_loop.body.case: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) +// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK10-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case.split: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body.sections.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC]] // CHECK10: omp_section_loop.inc: // CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1 // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] @@ -4542,125 +4601,126 @@ // CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK10: omp_section_loop.after: -// CHECK10-NEXT: br label [[OMP_SECTIONS_END:%.*]] -// CHECK10: omp_sections.end: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] +// CHECK10: omp_section_loop.aftersections.fini: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] // CHECK10: omp_section_loop.preheader13: -// CHECK10-NEXT: store i32 0, i32* [[P_LOWERBOUND28]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[P_UPPERBOUND29]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[P_STRIDE30]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 0) -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]] -// CHECK10-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1 +// CHECK10-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 +// CHECK10-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 +// CHECK10-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, i32* [[P_LASTITER28]], i32* [[P_LOWERBOUND29]], i32* [[P_UPPERBOUND30]], i32* [[P_STRIDE31]], i32 1, i32 0) +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND29]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] +// CHECK10-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] // CHECK10: omp_section_loop.header14: // CHECK10-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] // CHECK10: omp_section_loop.cond15: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP10]] +// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] // CHECK10-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] // CHECK10: omp_section_loop.body16: -// CHECK10-NEXT: [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP7]] -// CHECK10-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 1 -// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], 0 -// CHECK10-NEXT: switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC17]] [ +// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK10-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 +// CHECK10-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 +// CHECK10-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ // CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] // CHECK10-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] // CHECK10-NEXT: ] +// CHECK10: omp_section_loop.body.case23: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) +// CHECK10-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 +// CHECK10-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case23.split: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case23.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body.case25: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK10-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 +// CHECK10-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case25.split: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] +// CHECK10: omp_section_loop.body.case25.section.after26: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case25.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body16.sections.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC17]] // CHECK10: omp_section_loop.inc17: // CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] // CHECK10: omp_section_loop.exit18: -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]]) -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] // CHECK10: omp_section_loop.after19: -// CHECK10-NEXT: br label [[OMP_SECTIONS_END33:%.*]] -// CHECK10: omp_sections.end33: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] +// CHECK10: omp_section_loop.after19sections.fini: +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK10-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK10-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK10-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK10-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load float, float* @flag, align 4 -// CHECK10-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP26]], 0.000000e+00 +// CHECK10-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 +// CHECK10-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 // CHECK10-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK10-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) -// CHECK10-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK10-NEXT: br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK10-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK10-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK10: .cancel.exit: // CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK10: omp_section_loop.body.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) -// CHECK10-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0 -// CHECK10-NEXT: br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case.split: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] // CHECK10: omp_section_loop.body.case.cncl: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK10: omp_section_loop.body.case23: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) -// CHECK10-NEXT: [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0 -// CHECK10-NEXT: br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case23.split: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK10: omp_section_loop.body.case23.cncl: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK10: omp_section_loop.body.case25: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3) -// CHECK10-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0 -// CHECK10-NEXT: br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case25.split: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK10: omp_section_loop.body.case25.cncl: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK10: .cancel.continue: @@ -4748,6 +4808,8 @@ // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK10-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK10-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 +// CHECK10-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] +// CHECK10: omp.par.region.parallel.after: // CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK10: omp.par.pre_finalize: // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] @@ -4928,6 +4990,8 @@ // CHECK10-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 // CHECK10-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] // CHECK10: .omp.sections.case2.split: +// CHECK10-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] +// CHECK10: .omp.sections.case2.section.after: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK10: .omp.sections.case2.cncl: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END]] diff --git a/clang/test/OpenMP/critical_codegen.cpp b/clang/test/OpenMP/critical_codegen.cpp --- a/clang/test/OpenMP/critical_codegen.cpp +++ b/clang/test/OpenMP/critical_codegen.cpp @@ -33,6 +33,8 @@ // ALL: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) // ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) // ALL-NEXT: store i8 2, i8* [[A_ADDR]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] // ALL-NEXT: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) #pragma omp critical a = 2; @@ -40,6 +42,8 @@ // ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) // IRBUILDER-NEXT: call {{.*}}void [[FOO]]() // NORMAL-NEXT: invoke {{.*}}void [[FOO]]() +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] // ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) #pragma omp critical(the_name) foo(); @@ -47,15 +51,17 @@ // ALL: call {{.*}}void @__kmpc_critical_with_hint([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]], i{{64|32}} 23) // IRBUILDER-NEXT: call {{.*}}void [[FOO]]() // NORMAL-NEXT: invoke {{.*}}void [[FOO]]() +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] // ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]]) #pragma omp critical(the_name1) hint(23) foo(); // IRBUILDER: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) // ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) - // ALL: br label - // ALL-NOT: call {{.*}}void @__kmpc_end_critical( - // ALL: br label - // ALL-NOT: call {{.*}}void @__kmpc_end_critical( + // NORMAL: br label + // NORMAL-NOT: call {{.*}}void @__kmpc_end_critical( + // NORMAL: br label + // NORMAL-NOT: call {{.*}}void @__kmpc_end_critical( // NORMAL: br label if (a) #pragma omp critical(the_name) diff --git a/clang/test/OpenMP/critical_codegen_attr.cpp b/clang/test/OpenMP/critical_codegen_attr.cpp --- a/clang/test/OpenMP/critical_codegen_attr.cpp +++ b/clang/test/OpenMP/critical_codegen_attr.cpp @@ -33,6 +33,8 @@ // ALL: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) // ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) // ALL-NEXT: store i8 2, i8* [[A_ADDR]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] // ALL-NEXT: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) [[omp::directive(critical)]] a = 2; @@ -40,6 +42,8 @@ // ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) // IRBUILDER-NEXT: call {{.*}}void [[FOO]]() // NORMAL-NEXT: invoke {{.*}}void [[FOO]]() +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] // ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) [[omp::directive(critical(the_name))]] foo(); @@ -47,15 +51,17 @@ // ALL: call {{.*}}void @__kmpc_critical_with_hint([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]], i{{64|32}} 23) // IRBUILDER-NEXT: call {{.*}}void [[FOO]]() // NORMAL-NEXT: invoke {{.*}}void [[FOO]]() +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] // ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]]) [[omp::directive(critical(the_name1) hint(23))]] foo(); // IRBUILDER: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) // ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) - // ALL: br label - // ALL-NOT: call {{.*}}void @__kmpc_end_critical( - // ALL: br label - // ALL-NOT: call {{.*}}void @__kmpc_end_critical( + // NORMAL: br label + // NORMAL-NOT: call {{.*}}void @__kmpc_end_critical( + // NORMAL: br label + // NORMAL-NOT: call {{.*}}void @__kmpc_end_critical( // NORMAL: br label if (a) [[omp::directive(critical(the_name))]] diff --git a/clang/test/OpenMP/masked_codegen.cpp b/clang/test/OpenMP/masked_codegen.cpp --- a/clang/test/OpenMP/masked_codegen.cpp +++ b/clang/test/OpenMP/masked_codegen.cpp @@ -33,6 +33,8 @@ // ALL-NEXT: br i1 [[IS_MASKED]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] // ALL: [[THEN]] // ALL-NEXT: store i8 2, i8* [[A_ADDR]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] // ALL-NEXT: call {{.*}}void @__kmpc_end_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // ALL-NEXT: br label {{%?}}[[EXIT]] // ALL: [[EXIT]] diff --git a/clang/test/OpenMP/master_codegen.cpp b/clang/test/OpenMP/master_codegen.cpp --- a/clang/test/OpenMP/master_codegen.cpp +++ b/clang/test/OpenMP/master_codegen.cpp @@ -33,6 +33,8 @@ // ALL-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] // ALL: [[THEN]] // ALL-NEXT: store i8 2, i8* [[A_ADDR]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] // ALL-NEXT: call {{.*}}void @__kmpc_end_master([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // ALL-NEXT: br label {{%?}}[[EXIT]] // ALL: [[EXIT]] diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp --- a/clang/test/OpenMP/ordered_codegen.cpp +++ b/clang/test/OpenMP/ordered_codegen.cpp @@ -1,19 +1,19 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1,CHECK1-NORMAL +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2,CHECK2-NORMAL +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2 -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1,CHECK1-IRBUILDER +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1-IRBUILDER // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2,CHECK2-IRBUILDER +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2-IRBUILDER -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3,CHECK3-NORMAL +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -fopenmp-version=45 -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4,CHECK4-NORMAL +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4 -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3,CHECK3-IRBUILDER +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3-IRBUILDER // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -fopenmp-version=45 -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-enable-irbuilder -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4,CHECK4-IRBUILDER +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-enable-irbuilder -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4-IRBUILDER // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK5 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s @@ -136,7 +136,7 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -145,12 +145,9 @@ // CHECK1-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) -// CHECK1-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -168,7 +165,6 @@ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 @@ -200,7 +196,6 @@ // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -208,9 +203,7 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]]) -// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -228,7 +221,7 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -237,11 +230,9 @@ // CHECK1-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -260,7 +251,6 @@ // CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127 // CHECK1-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK1-NEXT: store i64 [[ADD1]], i64* [[I]], align 8 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8 @@ -288,7 +278,6 @@ // CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1 // CHECK1-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -296,9 +285,7 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -324,7 +311,7 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I8:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[X9:%.*]] = alloca i32, align 4 -// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -357,11 +344,9 @@ // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741894, i64 0, i64 [[TMP6]], i64 1, i64 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -392,7 +377,6 @@ // CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]] // CHECK1-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32 // CHECK1-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1 @@ -424,7 +408,6 @@ // CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1 // CHECK1-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -434,9 +417,7 @@ // CHECK1: omp.dispatch.end: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -457,7 +438,7 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[X2:%.*]] = alloca i32, align 4 -// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -467,11 +448,9 @@ // CHECK1-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741893, i32 0, i32 199, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -499,7 +478,6 @@ // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]] // CHECK1-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1 @@ -531,7 +509,6 @@ // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -539,9 +516,7 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -568,7 +543,7 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 // CHECK1-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 @@ -655,11 +630,9 @@ // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -667,15 +640,13 @@ // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] -// CHECK1-IRBUILDER: omp.inner.for.cond30: -// CHECK1-NORMAL: omp.inner.for.cond29: +// CHECK1: omp.inner.for.cond29: // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK1-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK1-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] -// CHECK1-IRBUILDER: omp.inner.for.body33: -// CHECK1-NORMAL: omp.inner.for.body32: +// CHECK1: omp.inner.for.body32: // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 @@ -687,19 +658,15 @@ // CHECK1-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 // CHECK1-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] -// CHECK1-IRBUILDER: omp.body.continue38: -// CHECK1-NORMAL: omp.body.continue37: +// CHECK1: omp.body.continue37: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] -// CHECK1-IRBUILDER: omp.inner.for.inc39: -// CHECK1-NORMAL: omp.inner.for.inc38: +// CHECK1: omp.inner.for.inc38: // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 // CHECK1-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK1-IRBUILDER: omp.inner.for.end42: -// CHECK1-NORMAL: omp.inner.for.end40: +// CHECK1: omp.inner.for.end40: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] @@ -722,9 +689,7 @@ // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: -// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -768,7 +733,7 @@ // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -777,12 +742,9 @@ // CHECK2-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) -// CHECK2-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -800,7 +762,6 @@ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK2-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 @@ -832,7 +793,6 @@ // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: @@ -840,9 +800,7 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]]) -// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -860,7 +818,7 @@ // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -869,11 +827,9 @@ // CHECK2-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -892,7 +848,6 @@ // CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127 // CHECK2-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK2-NEXT: store i64 [[ADD1]], i64* [[I]], align 8 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8 @@ -920,7 +875,6 @@ // CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1 // CHECK2-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: @@ -928,9 +882,7 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -956,7 +908,7 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I8:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[X9:%.*]] = alloca i32, align 4 -// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -989,11 +941,9 @@ // CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741894, i64 0, i64 [[TMP6]], i64 1, i64 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1024,7 +974,6 @@ // CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]] // CHECK2-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32 // CHECK2-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1 @@ -1056,7 +1005,6 @@ // CHECK2-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1 // CHECK2-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: @@ -1066,9 +1014,7 @@ // CHECK2: omp.dispatch.end: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -1089,7 +1035,7 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[X2:%.*]] = alloca i32, align 4 -// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1099,11 +1045,9 @@ // CHECK2-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741893, i32 0, i32 199, i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1131,7 +1075,6 @@ // CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]] // CHECK2-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1 @@ -1163,7 +1106,6 @@ // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK2-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: @@ -1171,9 +1113,7 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -1200,7 +1140,7 @@ // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 // CHECK2-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 @@ -1287,11 +1227,9 @@ // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1299,15 +1237,13 @@ // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] -// CHECK2-IRBUILDER: omp.inner.for.cond30: -// CHECK2-NORMAL: omp.inner.for.cond29: +// CHECK2: omp.inner.for.cond29: // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK2-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK2-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] -// CHECK2-IRBUILDER: omp.inner.for.body33: -// CHECK2-NORMAL: omp.inner.for.body32: +// CHECK2: omp.inner.for.body32: // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 @@ -1319,19 +1255,15 @@ // CHECK2-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 // CHECK2-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] -// CHECK2-IRBUILDER: omp.body.continue38: -// CHECK2-NORMAL: omp.body.continue37: +// CHECK2: omp.body.continue37: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] -// CHECK2-IRBUILDER: omp.inner.for.inc39: -// CHECK2-NORMAL: omp.inner.for.inc38: +// CHECK2: omp.inner.for.inc38: // CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 // CHECK2-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK2-IRBUILDER: omp.inner.for.end42: -// CHECK2-NORMAL: omp.inner.for.end40: +// CHECK2: omp.inner.for.end40: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] @@ -1354,9 +1286,7 @@ // CHECK2: .omp.final.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -1386,6 +1316,1262 @@ // CHECK2-NEXT: ret void // // +// CHECK1-IRBUILDER-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_ +// CHECK1-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-IRBUILDER-NEXT: entry: +// CHECK1-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +// CHECK1-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.body: +// CHECK1-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-IRBUILDER-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK1-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.body: +// CHECK1-IRBUILDER-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 7 +// CHECK1-IRBUILDER-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] +// CHECK1-IRBUILDER-NEXT: store i32 [[SUB]], i32* [[I]], align 4 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK1-IRBUILDER-NEXT: [[TMP5:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP6:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 [[IDXPROM]] +// CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 [[IDXPROM3]] +// CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK1-IRBUILDER-NEXT: [[MUL5:%.*]] = fmul float [[TMP7]], [[TMP10]] +// CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[IDXPROM6]] +// CHECK1-IRBUILDER-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX7]], align 4 +// CHECK1-IRBUILDER-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP13]] +// CHECK1-IRBUILDER-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM9]] +// CHECK1-IRBUILDER-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1-IRBUILDER: omp.body.continue: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.inc: +// CHECK1-IRBUILDER-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1-IRBUILDER: omp.inner.for.end: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.inc: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK1-IRBUILDER: omp.dispatch.end: +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) +// CHECK1-IRBUILDER-NEXT: ret void +// +// +// CHECK1-IRBUILDER-LABEL: define {{[^@]+}}@_Z8dynamic1PfS_S_S_ +// CHECK1-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK1-IRBUILDER-NEXT: entry: +// CHECK1-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i64, align 8 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[I:%.*]] = alloca i64, align 8 +// CHECK1-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK1-IRBUILDER-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 +// CHECK1-IRBUILDER-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1) +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK1-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +// CHECK1-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.body: +// CHECK1-IRBUILDER-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-IRBUILDER-NEXT: store i64 [[TMP1]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-IRBUILDER-NEXT: [[ADD:%.*]] = add i64 [[TMP3]], 1 +// CHECK1-IRBUILDER-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP2]], [[ADD]] +// CHECK1-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.body: +// CHECK1-IRBUILDER-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[MUL:%.*]] = mul i64 [[TMP4]], 127 +// CHECK1-IRBUILDER-NEXT: [[ADD2:%.*]] = add i64 131071, [[MUL]] +// CHECK1-IRBUILDER-NEXT: store i64 [[ADD2]], i64* [[I]], align 8 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK1-IRBUILDER-NEXT: [[TMP5:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP6:%.*]] = load i64, i64* [[I]], align 8 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 [[TMP6]] +// CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load i64, i64* [[I]], align 8 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 [[TMP9]] +// CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK1-IRBUILDER-NEXT: [[MUL5:%.*]] = fmul float [[TMP7]], [[TMP10]] +// CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] +// CHECK1-IRBUILDER-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK1-IRBUILDER-NEXT: [[MUL7:%.*]] = fmul float [[MUL5]], [[TMP13]] +// CHECK1-IRBUILDER-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] +// CHECK1-IRBUILDER-NEXT: store float [[MUL7]], float* [[ARRAYIDX8]], align 4 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1-IRBUILDER: omp.body.continue: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.inc: +// CHECK1-IRBUILDER-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[ADD9:%.*]] = add i64 [[TMP16]], 1 +// CHECK1-IRBUILDER-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1-IRBUILDER: omp.inner.for.end: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.inc: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK1-IRBUILDER: omp.dispatch.end: +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) +// CHECK1-IRBUILDER-NEXT: ret void +// +// +// CHECK1-IRBUILDER-LABEL: define {{[^@]+}}@_Z9test_autoPfS_S_S_ +// CHECK1-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK1-IRBUILDER-NEXT: entry: +// CHECK1-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i8, align 1 +// CHECK1-IRBUILDER-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i64, align 8 +// CHECK1-IRBUILDER-NEXT: [[I:%.*]] = alloca i8, align 1 +// CHECK1-IRBUILDER-NEXT: [[X6:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[I8:%.*]] = alloca i8, align 1 +// CHECK1-IRBUILDER-NEXT: [[X9:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store i32 0, i32* [[X]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 0, i32* [[Y]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32, i32* [[Y]], align 4 +// CHECK1-IRBUILDER-NEXT: [[CONV:%.*]] = trunc i32 [[TMP0]] to i8 +// CHECK1-IRBUILDER-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-IRBUILDER-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-IRBUILDER-NEXT: [[CONV3:%.*]] = sext i8 [[TMP1]] to i32 +// CHECK1-IRBUILDER-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]] +// CHECK1-IRBUILDER-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1 +// CHECK1-IRBUILDER-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 +// CHECK1-IRBUILDER-NEXT: [[CONV4:%.*]] = zext i32 [[DIV]] to i64 +// CHECK1-IRBUILDER-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11 +// CHECK1-IRBUILDER-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK1-IRBUILDER-NEXT: store i64 [[SUB5]], i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-IRBUILDER-NEXT: store i8 [[TMP2]], i8* [[I]], align 1 +// CHECK1-IRBUILDER-NEXT: store i32 11, i32* [[X6]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-IRBUILDER-NEXT: [[CONV7:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK1-IRBUILDER-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV7]], 57 +// CHECK1-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1-IRBUILDER: omp.precond.then: +// CHECK1-IRBUILDER-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-IRBUILDER-NEXT: store i64 [[TMP4]], i64* [[DOTOMP_UB]], align 8 +// CHECK1-IRBUILDER-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1073741894, i64 0, i64 [[TMP5]], i64 1, i64 1) +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) +// CHECK1-IRBUILDER-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK1-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK1-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.body: +// CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-IRBUILDER-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-IRBUILDER-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP8]], [[TMP9]] +// CHECK1-IRBUILDER-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.body: +// CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-IRBUILDER-NEXT: [[CONV12:%.*]] = sext i8 [[TMP10]] to i64 +// CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[DIV13:%.*]] = sdiv i64 [[TMP11]], 11 +// CHECK1-IRBUILDER-NEXT: [[MUL14:%.*]] = mul nsw i64 [[DIV13]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD15:%.*]] = add nsw i64 [[CONV12]], [[MUL14]] +// CHECK1-IRBUILDER-NEXT: [[CONV16:%.*]] = trunc i64 [[ADD15]] to i8 +// CHECK1-IRBUILDER-NEXT: store i8 [[CONV16]], i8* [[I8]], align 1 +// CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP13]], 11 +// CHECK1-IRBUILDER-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 11 +// CHECK1-IRBUILDER-NEXT: [[SUB19:%.*]] = sub nsw i64 [[TMP12]], [[MUL18]] +// CHECK1-IRBUILDER-NEXT: [[MUL20:%.*]] = mul nsw i64 [[SUB19]], 1 +// CHECK1-IRBUILDER-NEXT: [[SUB21:%.*]] = sub nsw i64 11, [[MUL20]] +// CHECK1-IRBUILDER-NEXT: [[CONV22:%.*]] = trunc i64 [[SUB21]] to i32 +// CHECK1-IRBUILDER-NEXT: store i32 [[CONV22]], i32* [[X9]], align 4 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM23:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) +// CHECK1-IRBUILDER-NEXT: [[TMP14:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP15:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP15]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] +// CHECK1-IRBUILDER-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP17:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP18:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP18]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM24]] +// CHECK1-IRBUILDER-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX25]], align 4 +// CHECK1-IRBUILDER-NEXT: [[MUL26:%.*]] = fmul float [[TMP16]], [[TMP19]] +// CHECK1-IRBUILDER-NEXT: [[TMP20:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP21:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP21]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM27]] +// CHECK1-IRBUILDER-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX28]], align 4 +// CHECK1-IRBUILDER-NEXT: [[MUL29:%.*]] = fmul float [[MUL26]], [[TMP22]] +// CHECK1-IRBUILDER-NEXT: [[TMP23:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP24:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM30:%.*]] = sext i8 [[TMP24]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM30]] +// CHECK1-IRBUILDER-NEXT: store float [[MUL29]], float* [[ARRAYIDX31]], align 4 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) +// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1-IRBUILDER: omp.body.continue: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.inc: +// CHECK1-IRBUILDER-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[ADD32:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK1-IRBUILDER-NEXT: store i64 [[ADD32]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1-IRBUILDER: omp.inner.for.end: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.inc: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK1-IRBUILDER: omp.dispatch.end: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1-IRBUILDER: omp.precond.end: +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM34:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM34]]) +// CHECK1-IRBUILDER-NEXT: ret void +// +// +// CHECK1-IRBUILDER-LABEL: define {{[^@]+}}@_Z7runtimePfS_S_S_ +// CHECK1-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK1-IRBUILDER-NEXT: entry: +// CHECK1-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-IRBUILDER-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i8, align 1 +// CHECK1-IRBUILDER-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[I:%.*]] = alloca i8, align 1 +// CHECK1-IRBUILDER-NEXT: [[X2:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: store i32 0, i32* [[X]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1073741893, i32 0, i32 199, i32 1, i32 1) +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +// CHECK1-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.body: +// CHECK1-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-IRBUILDER-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK1-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.body: +// CHECK1-IRBUILDER-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 20 +// CHECK1-IRBUILDER-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] +// CHECK1-IRBUILDER-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 +// CHECK1-IRBUILDER-NEXT: store i8 [[CONV]], i8* [[I]], align 1 +// CHECK1-IRBUILDER-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP6]], 20 +// CHECK1-IRBUILDER-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 20 +// CHECK1-IRBUILDER-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], [[MUL5]] +// CHECK1-IRBUILDER-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD7:%.*]] = add nsw i32 -10, [[MUL6]] +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD7]], i32* [[X2]], align 4 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) +// CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load i8, i8* [[I]], align 1 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP8]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP7]], i64 [[IDXPROM]] +// CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load i8, i8* [[I]], align 1 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP11]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP10]], i64 [[IDXPROM9]] +// CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK1-IRBUILDER-NEXT: [[MUL11:%.*]] = fmul float [[TMP9]], [[TMP12]] +// CHECK1-IRBUILDER-NEXT: [[TMP13:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP14]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM12]] +// CHECK1-IRBUILDER-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX13]], align 4 +// CHECK1-IRBUILDER-NEXT: [[MUL14:%.*]] = fmul float [[MUL11]], [[TMP15]] +// CHECK1-IRBUILDER-NEXT: [[TMP16:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM15:%.*]] = zext i8 [[TMP17]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM15]] +// CHECK1-IRBUILDER-NEXT: store float [[MUL14]], float* [[ARRAYIDX16]], align 4 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) +// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1-IRBUILDER: omp.body.continue: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.inc: +// CHECK1-IRBUILDER-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM18:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM18]]) +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1-IRBUILDER: omp.inner.for.end: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.inc: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK1-IRBUILDER: omp.dispatch.end: +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM19:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM19]]) +// CHECK1-IRBUILDER-NEXT: ret void +// +// +// CHECK1-IRBUILDER-LABEL: define {{[^@]+}}@_Z8foo_simdii +// CHECK1-IRBUILDER-SAME: (i32 noundef [[LOW:%.*]], i32 noundef [[UP:%.*]]) #[[ATTR0]] { +// CHECK1-IRBUILDER-NEXT: entry: +// CHECK1-IRBUILDER-NEXT: [[LOW_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[UP_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_IV16:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[_TMP17:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_20:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[I26:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: [[I28:%.*]] = alloca i32, align 4 +// CHECK1-IRBUILDER-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[UP_ADDR]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-IRBUILDER-NEXT: [[SUB:%.*]] = sub i32 [[TMP2]], [[TMP3]] +// CHECK1-IRBUILDER-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 +// CHECK1-IRBUILDER-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 +// CHECK1-IRBUILDER-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-IRBUILDER-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 [[TMP4]], i32* [[I]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-IRBUILDER-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP5]], [[TMP6]] +// CHECK1-IRBUILDER-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK1-IRBUILDER: simd.if.then: +// CHECK1-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1 +// CHECK1-IRBUILDER-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]] +// CHECK1-IRBUILDER-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.body: +// CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]] +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD8]], i32* [[I5]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1-IRBUILDER: omp.body.continue: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.inc: +// CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], 1 +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-IRBUILDER: omp.inner.for.end: +// CHECK1-IRBUILDER-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-IRBUILDER-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK1-IRBUILDER-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], 1 +// CHECK1-IRBUILDER-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], 1 +// CHECK1-IRBUILDER-NEXT: [[MUL14:%.*]] = mul i32 [[DIV13]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD15:%.*]] = add i32 [[TMP13]], [[MUL14]] +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD15]], i32* [[I5]], align 4 +// CHECK1-IRBUILDER-NEXT: br label [[SIMD_IF_END]] +// CHECK1-IRBUILDER: simd.if.end: +// CHECK1-IRBUILDER-NEXT: [[TMP16:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 [[TMP16]], i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP17:%.*]] = load i32, i32* [[UP_ADDR]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK1-IRBUILDER-NEXT: [[SUB21:%.*]] = sub i32 [[TMP18]], [[TMP19]] +// CHECK1-IRBUILDER-NEXT: [[SUB22:%.*]] = sub i32 [[SUB21]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD23:%.*]] = add i32 [[SUB22]], 1 +// CHECK1-IRBUILDER-NEXT: [[DIV24:%.*]] = udiv i32 [[ADD23]], 1 +// CHECK1-IRBUILDER-NEXT: [[SUB25:%.*]] = sub i32 [[DIV24]], 1 +// CHECK1-IRBUILDER-NEXT: store i32 [[SUB25]], i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 [[TMP20]], i32* [[I26]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-IRBUILDER-NEXT: [[CMP27:%.*]] = icmp slt i32 [[TMP21]], [[TMP22]] +// CHECK1-IRBUILDER-NEXT: br i1 [[CMP27]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1-IRBUILDER: omp.precond.then: +// CHECK1-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 66, i32 0, i32 [[TMP24]], i32 1, i32 1) +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM29:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK1-IRBUILDER-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM29]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.body: +// CHECK1-IRBUILDER-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-IRBUILDER-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV16]], align 4 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.cond30: +// CHECK1-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 +// CHECK1-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] +// CHECK1-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.body33: +// CHECK1-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[MUL34:%.*]] = mul i32 [[TMP30]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD35:%.*]] = add i32 [[TMP29]], [[MUL34]] +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD35]], i32* [[I28]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, i32* [[I28]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM36]] +// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, float* [[ARRAYIDX37]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.body33.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] +// CHECK1-IRBUILDER: omp.body.continue38: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.inc39: +// CHECK1-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD40]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1-IRBUILDER: omp.inner.for.end42: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK1-IRBUILDER: omp.dispatch.inc: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK1-IRBUILDER: omp.dispatch.end: +// CHECK1-IRBUILDER-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-IRBUILDER-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-IRBUILDER: .omp.final.then: +// CHECK1-IRBUILDER-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK1-IRBUILDER-NEXT: [[SUB43:%.*]] = sub i32 [[TMP36]], [[TMP37]] +// CHECK1-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[SUB43]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD45:%.*]] = add i32 [[SUB44]], 1 +// CHECK1-IRBUILDER-NEXT: [[DIV46:%.*]] = udiv i32 [[ADD45]], 1 +// CHECK1-IRBUILDER-NEXT: [[MUL47:%.*]] = mul i32 [[DIV46]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD48:%.*]] = add i32 [[TMP35]], [[MUL47]] +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD48]], i32* [[I28]], align 4 +// CHECK1-IRBUILDER-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK1-IRBUILDER: .omp.final.done: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1-IRBUILDER: omp.precond.end: +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK1-IRBUILDER-NEXT: ret void +// +// +// CHECK1-IRBUILDER-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK1-IRBUILDER-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-IRBUILDER-NEXT: entry: +// CHECK1-IRBUILDER-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-IRBUILDER-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK1-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK1-IRBUILDER-NEXT: ret void +// +// +// CHECK1-IRBUILDER-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK1-IRBUILDER-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK1-IRBUILDER-NEXT: entry: +// CHECK1-IRBUILDER-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-IRBUILDER-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK1-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK1-IRBUILDER-NEXT: ret void +// +// +// CHECK2-IRBUILDER-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_ +// CHECK2-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-IRBUILDER-NEXT: entry: +// CHECK2-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK2-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +// CHECK2-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.body: +// CHECK2-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-IRBUILDER-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK2-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.body: +// CHECK2-IRBUILDER-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 7 +// CHECK2-IRBUILDER-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] +// CHECK2-IRBUILDER-NEXT: store i32 [[SUB]], i32* [[I]], align 4 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK2-IRBUILDER-NEXT: [[TMP5:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP6:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 [[IDXPROM]] +// CHECK2-IRBUILDER-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP8:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 [[IDXPROM3]] +// CHECK2-IRBUILDER-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK2-IRBUILDER-NEXT: [[MUL5:%.*]] = fmul float [[TMP7]], [[TMP10]] +// CHECK2-IRBUILDER-NEXT: [[TMP11:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[IDXPROM6]] +// CHECK2-IRBUILDER-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX7]], align 4 +// CHECK2-IRBUILDER-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP13]] +// CHECK2-IRBUILDER-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM9]] +// CHECK2-IRBUILDER-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2-IRBUILDER: omp.body.continue: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.inc: +// CHECK2-IRBUILDER-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-IRBUILDER-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2-IRBUILDER: omp.inner.for.end: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.inc: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK2-IRBUILDER: omp.dispatch.end: +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) +// CHECK2-IRBUILDER-NEXT: ret void +// +// +// CHECK2-IRBUILDER-LABEL: define {{[^@]+}}@_Z8dynamic1PfS_S_S_ +// CHECK2-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK2-IRBUILDER-NEXT: entry: +// CHECK2-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i64, align 8 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[I:%.*]] = alloca i64, align 8 +// CHECK2-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK2-IRBUILDER-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 +// CHECK2-IRBUILDER-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK2-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1) +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK2-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +// CHECK2-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.body: +// CHECK2-IRBUILDER-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK2-IRBUILDER-NEXT: store i64 [[TMP1]], i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK2-IRBUILDER-NEXT: [[ADD:%.*]] = add i64 [[TMP3]], 1 +// CHECK2-IRBUILDER-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP2]], [[ADD]] +// CHECK2-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.body: +// CHECK2-IRBUILDER-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[MUL:%.*]] = mul i64 [[TMP4]], 127 +// CHECK2-IRBUILDER-NEXT: [[ADD2:%.*]] = add i64 131071, [[MUL]] +// CHECK2-IRBUILDER-NEXT: store i64 [[ADD2]], i64* [[I]], align 8 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK2-IRBUILDER-NEXT: [[TMP5:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP6:%.*]] = load i64, i64* [[I]], align 8 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 [[TMP6]] +// CHECK2-IRBUILDER-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP8:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP9:%.*]] = load i64, i64* [[I]], align 8 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 [[TMP9]] +// CHECK2-IRBUILDER-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK2-IRBUILDER-NEXT: [[MUL5:%.*]] = fmul float [[TMP7]], [[TMP10]] +// CHECK2-IRBUILDER-NEXT: [[TMP11:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] +// CHECK2-IRBUILDER-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK2-IRBUILDER-NEXT: [[MUL7:%.*]] = fmul float [[MUL5]], [[TMP13]] +// CHECK2-IRBUILDER-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] +// CHECK2-IRBUILDER-NEXT: store float [[MUL7]], float* [[ARRAYIDX8]], align 4 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2-IRBUILDER: omp.body.continue: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.inc: +// CHECK2-IRBUILDER-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[ADD9:%.*]] = add i64 [[TMP16]], 1 +// CHECK2-IRBUILDER-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2-IRBUILDER: omp.inner.for.end: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.inc: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK2-IRBUILDER: omp.dispatch.end: +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) +// CHECK2-IRBUILDER-NEXT: ret void +// +// +// CHECK2-IRBUILDER-LABEL: define {{[^@]+}}@_Z9test_autoPfS_S_S_ +// CHECK2-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK2-IRBUILDER-NEXT: entry: +// CHECK2-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i8, align 1 +// CHECK2-IRBUILDER-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK2-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i64, align 8 +// CHECK2-IRBUILDER-NEXT: [[I:%.*]] = alloca i8, align 1 +// CHECK2-IRBUILDER-NEXT: [[X6:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[I8:%.*]] = alloca i8, align 1 +// CHECK2-IRBUILDER-NEXT: [[X9:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store i32 0, i32* [[X]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 0, i32* [[Y]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32, i32* [[Y]], align 4 +// CHECK2-IRBUILDER-NEXT: [[CONV:%.*]] = trunc i32 [[TMP0]] to i8 +// CHECK2-IRBUILDER-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-IRBUILDER-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-IRBUILDER-NEXT: [[CONV3:%.*]] = sext i8 [[TMP1]] to i32 +// CHECK2-IRBUILDER-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]] +// CHECK2-IRBUILDER-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1 +// CHECK2-IRBUILDER-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 +// CHECK2-IRBUILDER-NEXT: [[CONV4:%.*]] = zext i32 [[DIV]] to i64 +// CHECK2-IRBUILDER-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11 +// CHECK2-IRBUILDER-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK2-IRBUILDER-NEXT: store i64 [[SUB5]], i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-IRBUILDER-NEXT: store i8 [[TMP2]], i8* [[I]], align 1 +// CHECK2-IRBUILDER-NEXT: store i32 11, i32* [[X6]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-IRBUILDER-NEXT: [[CONV7:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK2-IRBUILDER-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV7]], 57 +// CHECK2-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2-IRBUILDER: omp.precond.then: +// CHECK2-IRBUILDER-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-IRBUILDER-NEXT: store i64 [[TMP4]], i64* [[DOTOMP_UB]], align 8 +// CHECK2-IRBUILDER-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK2-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1073741894, i64 0, i64 [[TMP5]], i64 1, i64 1) +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) +// CHECK2-IRBUILDER-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK2-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK2-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.body: +// CHECK2-IRBUILDER-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK2-IRBUILDER-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK2-IRBUILDER-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP8]], [[TMP9]] +// CHECK2-IRBUILDER-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.body: +// CHECK2-IRBUILDER-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-IRBUILDER-NEXT: [[CONV12:%.*]] = sext i8 [[TMP10]] to i64 +// CHECK2-IRBUILDER-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[DIV13:%.*]] = sdiv i64 [[TMP11]], 11 +// CHECK2-IRBUILDER-NEXT: [[MUL14:%.*]] = mul nsw i64 [[DIV13]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD15:%.*]] = add nsw i64 [[CONV12]], [[MUL14]] +// CHECK2-IRBUILDER-NEXT: [[CONV16:%.*]] = trunc i64 [[ADD15]] to i8 +// CHECK2-IRBUILDER-NEXT: store i8 [[CONV16]], i8* [[I8]], align 1 +// CHECK2-IRBUILDER-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP13]], 11 +// CHECK2-IRBUILDER-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 11 +// CHECK2-IRBUILDER-NEXT: [[SUB19:%.*]] = sub nsw i64 [[TMP12]], [[MUL18]] +// CHECK2-IRBUILDER-NEXT: [[MUL20:%.*]] = mul nsw i64 [[SUB19]], 1 +// CHECK2-IRBUILDER-NEXT: [[SUB21:%.*]] = sub nsw i64 11, [[MUL20]] +// CHECK2-IRBUILDER-NEXT: [[CONV22:%.*]] = trunc i64 [[SUB21]] to i32 +// CHECK2-IRBUILDER-NEXT: store i32 [[CONV22]], i32* [[X9]], align 4 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM23:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) +// CHECK2-IRBUILDER-NEXT: [[TMP14:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP15:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP15]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] +// CHECK2-IRBUILDER-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP17:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP18:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP18]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM24]] +// CHECK2-IRBUILDER-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX25]], align 4 +// CHECK2-IRBUILDER-NEXT: [[MUL26:%.*]] = fmul float [[TMP16]], [[TMP19]] +// CHECK2-IRBUILDER-NEXT: [[TMP20:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP21:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP21]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM27]] +// CHECK2-IRBUILDER-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX28]], align 4 +// CHECK2-IRBUILDER-NEXT: [[MUL29:%.*]] = fmul float [[MUL26]], [[TMP22]] +// CHECK2-IRBUILDER-NEXT: [[TMP23:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP24:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM30:%.*]] = sext i8 [[TMP24]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM30]] +// CHECK2-IRBUILDER-NEXT: store float [[MUL29]], float* [[ARRAYIDX31]], align 4 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) +// CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2-IRBUILDER: omp.body.continue: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.inc: +// CHECK2-IRBUILDER-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[ADD32:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK2-IRBUILDER-NEXT: store i64 [[ADD32]], i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2-IRBUILDER: omp.inner.for.end: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.inc: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK2-IRBUILDER: omp.dispatch.end: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2-IRBUILDER: omp.precond.end: +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM34:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM34]]) +// CHECK2-IRBUILDER-NEXT: ret void +// +// +// CHECK2-IRBUILDER-LABEL: define {{[^@]+}}@_Z7runtimePfS_S_S_ +// CHECK2-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK2-IRBUILDER-NEXT: entry: +// CHECK2-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-IRBUILDER-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i8, align 1 +// CHECK2-IRBUILDER-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[I:%.*]] = alloca i8, align 1 +// CHECK2-IRBUILDER-NEXT: [[X2:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: store i32 0, i32* [[X]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1073741893, i32 0, i32 199, i32 1, i32 1) +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK2-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +// CHECK2-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.body: +// CHECK2-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-IRBUILDER-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK2-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.body: +// CHECK2-IRBUILDER-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 20 +// CHECK2-IRBUILDER-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] +// CHECK2-IRBUILDER-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 +// CHECK2-IRBUILDER-NEXT: store i8 [[CONV]], i8* [[I]], align 1 +// CHECK2-IRBUILDER-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP6]], 20 +// CHECK2-IRBUILDER-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 20 +// CHECK2-IRBUILDER-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], [[MUL5]] +// CHECK2-IRBUILDER-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD7:%.*]] = add nsw i32 -10, [[MUL6]] +// CHECK2-IRBUILDER-NEXT: store i32 [[ADD7]], i32* [[X2]], align 4 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) +// CHECK2-IRBUILDER-NEXT: [[TMP7:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP8:%.*]] = load i8, i8* [[I]], align 1 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP8]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP7]], i64 [[IDXPROM]] +// CHECK2-IRBUILDER-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP10:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP11:%.*]] = load i8, i8* [[I]], align 1 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP11]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP10]], i64 [[IDXPROM9]] +// CHECK2-IRBUILDER-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK2-IRBUILDER-NEXT: [[MUL11:%.*]] = fmul float [[TMP9]], [[TMP12]] +// CHECK2-IRBUILDER-NEXT: [[TMP13:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP14]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM12]] +// CHECK2-IRBUILDER-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX13]], align 4 +// CHECK2-IRBUILDER-NEXT: [[MUL14:%.*]] = fmul float [[MUL11]], [[TMP15]] +// CHECK2-IRBUILDER-NEXT: [[TMP16:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM15:%.*]] = zext i8 [[TMP17]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM15]] +// CHECK2-IRBUILDER-NEXT: store float [[MUL14]], float* [[ARRAYIDX16]], align 4 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) +// CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2-IRBUILDER: omp.body.continue: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.inc: +// CHECK2-IRBUILDER-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK2-IRBUILDER-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM18:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM18]]) +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2-IRBUILDER: omp.inner.for.end: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.inc: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK2-IRBUILDER: omp.dispatch.end: +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM19:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM19]]) +// CHECK2-IRBUILDER-NEXT: ret void +// +// +// CHECK2-IRBUILDER-LABEL: define {{[^@]+}}@_Z8foo_simdii +// CHECK2-IRBUILDER-SAME: (i32 noundef [[LOW:%.*]], i32 noundef [[UP:%.*]]) #[[ATTR0]] { +// CHECK2-IRBUILDER-NEXT: entry: +// CHECK2-IRBUILDER-NEXT: [[LOW_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[UP_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_IV16:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[_TMP17:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_20:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[I26:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: [[I28:%.*]] = alloca i32, align 4 +// CHECK2-IRBUILDER-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[UP_ADDR]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-IRBUILDER-NEXT: [[SUB:%.*]] = sub i32 [[TMP2]], [[TMP3]] +// CHECK2-IRBUILDER-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 +// CHECK2-IRBUILDER-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 +// CHECK2-IRBUILDER-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-IRBUILDER-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 [[TMP4]], i32* [[I]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-IRBUILDER-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP5]], [[TMP6]] +// CHECK2-IRBUILDER-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK2-IRBUILDER: simd.if.then: +// CHECK2-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK2-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK2-IRBUILDER-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1 +// CHECK2-IRBUILDER-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]] +// CHECK2-IRBUILDER-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.body: +// CHECK2-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !3 +// CHECK2-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK2-IRBUILDER-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]] +// CHECK2-IRBUILDER-NEXT: store i32 [[ADD8]], i32* [[I5]], align 4, !llvm.access.group !3 +// CHECK2-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !3 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK2-IRBUILDER-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3 +// CHECK2-IRBUILDER-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2-IRBUILDER: omp.body.continue: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.inc: +// CHECK2-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK2-IRBUILDER-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], 1 +// CHECK2-IRBUILDER-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-IRBUILDER: omp.inner.for.end: +// CHECK2-IRBUILDER-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-IRBUILDER-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK2-IRBUILDER-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], 1 +// CHECK2-IRBUILDER-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], 1 +// CHECK2-IRBUILDER-NEXT: [[MUL14:%.*]] = mul i32 [[DIV13]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD15:%.*]] = add i32 [[TMP13]], [[MUL14]] +// CHECK2-IRBUILDER-NEXT: store i32 [[ADD15]], i32* [[I5]], align 4 +// CHECK2-IRBUILDER-NEXT: br label [[SIMD_IF_END]] +// CHECK2-IRBUILDER: simd.if.end: +// CHECK2-IRBUILDER-NEXT: [[TMP16:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 [[TMP16]], i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP17:%.*]] = load i32, i32* [[UP_ADDR]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK2-IRBUILDER-NEXT: [[SUB21:%.*]] = sub i32 [[TMP18]], [[TMP19]] +// CHECK2-IRBUILDER-NEXT: [[SUB22:%.*]] = sub i32 [[SUB21]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD23:%.*]] = add i32 [[SUB22]], 1 +// CHECK2-IRBUILDER-NEXT: [[DIV24:%.*]] = udiv i32 [[ADD23]], 1 +// CHECK2-IRBUILDER-NEXT: [[SUB25:%.*]] = sub i32 [[DIV24]], 1 +// CHECK2-IRBUILDER-NEXT: store i32 [[SUB25]], i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 [[TMP20]], i32* [[I26]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-IRBUILDER-NEXT: [[CMP27:%.*]] = icmp slt i32 [[TMP21]], [[TMP22]] +// CHECK2-IRBUILDER-NEXT: br i1 [[CMP27]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2-IRBUILDER: omp.precond.then: +// CHECK2-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 66, i32 0, i32 [[TMP24]], i32 1, i32 1) +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM29:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK2-IRBUILDER-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM29]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK2-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK2-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.body: +// CHECK2-IRBUILDER-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-IRBUILDER-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV16]], align 4 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.cond30: +// CHECK2-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 +// CHECK2-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] +// CHECK2-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.body33: +// CHECK2-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[MUL34:%.*]] = mul i32 [[TMP30]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD35:%.*]] = add i32 [[TMP29]], [[MUL34]] +// CHECK2-IRBUILDER-NEXT: store i32 [[ADD35]], i32* [[I28]], align 4, !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, i32* [[I28]], align 4, !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM36]] +// CHECK2-IRBUILDER-NEXT: store float 0.000000e+00, float* [[ARRAYIDX37]], align 4, !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.body33.ordered.after: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] +// CHECK2-IRBUILDER: omp.body.continue38: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.inc39: +// CHECK2-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 +// CHECK2-IRBUILDER-NEXT: store i32 [[ADD40]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-IRBUILDER: omp.inner.for.end42: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK2-IRBUILDER: omp.dispatch.inc: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK2-IRBUILDER: omp.dispatch.end: +// CHECK2-IRBUILDER-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK2-IRBUILDER-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-IRBUILDER: .omp.final.then: +// CHECK2-IRBUILDER-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK2-IRBUILDER-NEXT: [[SUB43:%.*]] = sub i32 [[TMP36]], [[TMP37]] +// CHECK2-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[SUB43]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD45:%.*]] = add i32 [[SUB44]], 1 +// CHECK2-IRBUILDER-NEXT: [[DIV46:%.*]] = udiv i32 [[ADD45]], 1 +// CHECK2-IRBUILDER-NEXT: [[MUL47:%.*]] = mul i32 [[DIV46]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD48:%.*]] = add i32 [[TMP35]], [[MUL47]] +// CHECK2-IRBUILDER-NEXT: store i32 [[ADD48]], i32* [[I28]], align 4 +// CHECK2-IRBUILDER-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK2-IRBUILDER: .omp.final.done: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2-IRBUILDER: omp.precond.end: +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK2-IRBUILDER-NEXT: ret void +// +// +// CHECK2-IRBUILDER-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK2-IRBUILDER-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK2-IRBUILDER-NEXT: entry: +// CHECK2-IRBUILDER-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-IRBUILDER-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK2-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK2-IRBUILDER-NEXT: ret void +// +// +// CHECK2-IRBUILDER-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK2-IRBUILDER-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK2-IRBUILDER-NEXT: entry: +// CHECK2-IRBUILDER-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-IRBUILDER-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK2-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK2-IRBUILDER-NEXT: ret void +// +// // CHECK3-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_ // CHECK3-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK3-NEXT: entry: @@ -1400,7 +2586,7 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1409,12 +2595,9 @@ // CHECK3-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) -// CHECK3-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1432,7 +2615,6 @@ // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 @@ -1464,7 +2646,6 @@ // CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1472,9 +2653,7 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]]) -// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1492,7 +2671,7 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1501,11 +2680,9 @@ // CHECK3-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 67, i64 0, i64 16908287, i64 1, i64 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1524,7 +2701,6 @@ // CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127 // CHECK3-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK3-NEXT: store i64 [[ADD1]], i64* [[I]], align 8 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8 @@ -1552,7 +2728,6 @@ // CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1 // CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1560,9 +2735,7 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1588,7 +2761,7 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I8:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[X9:%.*]] = alloca i32, align 4 -// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1621,11 +2794,9 @@ // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 70, i64 0, i64 [[TMP6]], i64 1, i64 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1656,7 +2827,6 @@ // CHECK3-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]] // CHECK3-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32 // CHECK3-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1 @@ -1688,7 +2858,6 @@ // CHECK3-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1 // CHECK3-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1698,9 +2867,7 @@ // CHECK3: omp.dispatch.end: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1721,7 +2888,7 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[X2:%.*]] = alloca i32, align 4 -// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1731,11 +2898,9 @@ // CHECK3-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 69, i32 0, i32 199, i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1763,7 +2928,6 @@ // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1 @@ -1795,7 +2959,6 @@ // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1803,9 +2966,7 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1832,7 +2993,7 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 // CHECK3-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 @@ -1919,11 +3080,9 @@ // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1931,15 +3090,13 @@ // CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] -// CHECK3-IRBUILDER: omp.inner.for.cond30: -// CHECK3-NORMAL: omp.inner.for.cond29: +// CHECK3: omp.inner.for.cond29: // CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK3-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK3-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] -// CHECK3-IRBUILDER: omp.inner.for.body33: -// CHECK3-NORMAL: omp.inner.for.body32: +// CHECK3: omp.inner.for.body32: // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 @@ -1951,19 +3108,15 @@ // CHECK3-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 // CHECK3-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] -// CHECK3-IRBUILDER: omp.body.continue38: -// CHECK3-NORMAL: omp.body.continue37: +// CHECK3: omp.body.continue37: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] -// CHECK3-IRBUILDER: omp.inner.for.inc39: -// CHECK3-NORMAL: omp.inner.for.inc38: +// CHECK3: omp.inner.for.inc38: // CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 // CHECK3-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK3-IRBUILDER: omp.inner.for.end42: -// CHECK3-NORMAL: omp.inner.for.end40: +// CHECK3: omp.inner.for.end40: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] @@ -1986,9 +3139,7 @@ // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: -// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -2032,7 +3183,7 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -2041,12 +3192,9 @@ // CHECK4-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) -// CHECK4-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK4-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -2064,7 +3212,6 @@ // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK4-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 @@ -2096,7 +3243,6 @@ // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -2104,9 +3250,7 @@ // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]]) -// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // @@ -2124,7 +3268,7 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -2133,11 +3277,9 @@ // CHECK4-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 67, i64 0, i64 16908287, i64 1, i64 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -2156,7 +3298,6 @@ // CHECK4-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127 // CHECK4-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK4-NEXT: store i64 [[ADD1]], i64* [[I]], align 8 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8 @@ -2184,7 +3325,6 @@ // CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK4-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1 // CHECK4-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -2192,9 +3332,7 @@ // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // @@ -2220,7 +3358,7 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I8:%.*]] = alloca i8, align 1 // CHECK4-NEXT: [[X9:%.*]] = alloca i32, align 4 -// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -2253,11 +3391,9 @@ // CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 70, i64 0, i64 [[TMP6]], i64 1, i64 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -2288,7 +3424,6 @@ // CHECK4-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]] // CHECK4-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32 // CHECK4-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK4-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1 @@ -2320,7 +3455,6 @@ // CHECK4-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK4-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1 // CHECK4-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -2330,9 +3464,7 @@ // CHECK4: omp.dispatch.end: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // @@ -2353,7 +3485,7 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK4-NEXT: [[X2:%.*]] = alloca i32, align 4 -// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -2363,11 +3495,9 @@ // CHECK4-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 69, i32 0, i32 199, i32 1, i32 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -2395,7 +3525,6 @@ // CHECK4-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]] // CHECK4-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK4-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1 @@ -2427,7 +3556,6 @@ // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK4-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -2435,9 +3563,7 @@ // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // @@ -2464,7 +3590,7 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 // CHECK4-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 @@ -2551,11 +3677,9 @@ // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK4-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -2563,15 +3687,13 @@ // CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] -// CHECK4-IRBUILDER: omp.inner.for.cond30: -// CHECK4-NORMAL: omp.inner.for.cond29: +// CHECK4: omp.inner.for.cond29: // CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK4-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK4-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] -// CHECK4-IRBUILDER: omp.inner.for.body33: -// CHECK4-NORMAL: omp.inner.for.body32: +// CHECK4: omp.inner.for.body32: // CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 @@ -2583,19 +3705,15 @@ // CHECK4-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 // CHECK4-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] -// CHECK4-IRBUILDER: omp.body.continue38: -// CHECK4-NORMAL: omp.body.continue37: +// CHECK4: omp.body.continue37: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] -// CHECK4-IRBUILDER: omp.inner.for.inc39: -// CHECK4-NORMAL: omp.inner.for.inc38: +// CHECK4: omp.inner.for.inc38: // CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 // CHECK4-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK4-IRBUILDER: omp.inner.for.end42: -// CHECK4-NORMAL: omp.inner.for.end40: +// CHECK4: omp.inner.for.end40: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] @@ -2618,9 +3736,7 @@ // CHECK4: .omp.final.done: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: -// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // @@ -2650,6 +3766,1262 @@ // CHECK4-NEXT: ret void // // +// CHECK3-IRBUILDER-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_ +// CHECK3-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK3-IRBUILDER-NEXT: entry: +// CHECK3-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +// CHECK3-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.body: +// CHECK3-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-IRBUILDER-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK3-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.body: +// CHECK3-IRBUILDER-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 7 +// CHECK3-IRBUILDER-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] +// CHECK3-IRBUILDER-NEXT: store i32 [[SUB]], i32* [[I]], align 4 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK3-IRBUILDER-NEXT: [[TMP5:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP6:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 [[IDXPROM]] +// CHECK3-IRBUILDER-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP8:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 [[IDXPROM3]] +// CHECK3-IRBUILDER-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK3-IRBUILDER-NEXT: [[MUL5:%.*]] = fmul float [[TMP7]], [[TMP10]] +// CHECK3-IRBUILDER-NEXT: [[TMP11:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[IDXPROM6]] +// CHECK3-IRBUILDER-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX7]], align 4 +// CHECK3-IRBUILDER-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP13]] +// CHECK3-IRBUILDER-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM9]] +// CHECK3-IRBUILDER-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3-IRBUILDER: omp.body.continue: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.inc: +// CHECK3-IRBUILDER-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3-IRBUILDER: omp.inner.for.end: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.inc: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK3-IRBUILDER: omp.dispatch.end: +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) +// CHECK3-IRBUILDER-NEXT: ret void +// +// +// CHECK3-IRBUILDER-LABEL: define {{[^@]+}}@_Z8dynamic1PfS_S_S_ +// CHECK3-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK3-IRBUILDER-NEXT: entry: +// CHECK3-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i64, align 8 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[I:%.*]] = alloca i64, align 8 +// CHECK3-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK3-IRBUILDER-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 +// CHECK3-IRBUILDER-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK3-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 67, i64 0, i64 16908287, i64 1, i64 1) +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK3-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +// CHECK3-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.body: +// CHECK3-IRBUILDER-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK3-IRBUILDER-NEXT: store i64 [[TMP1]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-IRBUILDER-NEXT: [[ADD:%.*]] = add i64 [[TMP3]], 1 +// CHECK3-IRBUILDER-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP2]], [[ADD]] +// CHECK3-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.body: +// CHECK3-IRBUILDER-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[MUL:%.*]] = mul i64 [[TMP4]], 127 +// CHECK3-IRBUILDER-NEXT: [[ADD2:%.*]] = add i64 131071, [[MUL]] +// CHECK3-IRBUILDER-NEXT: store i64 [[ADD2]], i64* [[I]], align 8 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK3-IRBUILDER-NEXT: [[TMP5:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP6:%.*]] = load i64, i64* [[I]], align 8 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 [[TMP6]] +// CHECK3-IRBUILDER-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP8:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP9:%.*]] = load i64, i64* [[I]], align 8 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 [[TMP9]] +// CHECK3-IRBUILDER-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK3-IRBUILDER-NEXT: [[MUL5:%.*]] = fmul float [[TMP7]], [[TMP10]] +// CHECK3-IRBUILDER-NEXT: [[TMP11:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] +// CHECK3-IRBUILDER-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK3-IRBUILDER-NEXT: [[MUL7:%.*]] = fmul float [[MUL5]], [[TMP13]] +// CHECK3-IRBUILDER-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] +// CHECK3-IRBUILDER-NEXT: store float [[MUL7]], float* [[ARRAYIDX8]], align 4 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3-IRBUILDER: omp.body.continue: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.inc: +// CHECK3-IRBUILDER-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[ADD9:%.*]] = add i64 [[TMP16]], 1 +// CHECK3-IRBUILDER-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3-IRBUILDER: omp.inner.for.end: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.inc: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK3-IRBUILDER: omp.dispatch.end: +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) +// CHECK3-IRBUILDER-NEXT: ret void +// +// +// CHECK3-IRBUILDER-LABEL: define {{[^@]+}}@_Z9test_autoPfS_S_S_ +// CHECK3-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK3-IRBUILDER-NEXT: entry: +// CHECK3-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i8, align 1 +// CHECK3-IRBUILDER-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK3-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i64, align 8 +// CHECK3-IRBUILDER-NEXT: [[I:%.*]] = alloca i8, align 1 +// CHECK3-IRBUILDER-NEXT: [[X6:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[I8:%.*]] = alloca i8, align 1 +// CHECK3-IRBUILDER-NEXT: [[X9:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store i32 0, i32* [[X]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 0, i32* [[Y]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32, i32* [[Y]], align 4 +// CHECK3-IRBUILDER-NEXT: [[CONV:%.*]] = trunc i32 [[TMP0]] to i8 +// CHECK3-IRBUILDER-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-IRBUILDER-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-IRBUILDER-NEXT: [[CONV3:%.*]] = sext i8 [[TMP1]] to i32 +// CHECK3-IRBUILDER-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]] +// CHECK3-IRBUILDER-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1 +// CHECK3-IRBUILDER-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 +// CHECK3-IRBUILDER-NEXT: [[CONV4:%.*]] = zext i32 [[DIV]] to i64 +// CHECK3-IRBUILDER-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11 +// CHECK3-IRBUILDER-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK3-IRBUILDER-NEXT: store i64 [[SUB5]], i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-IRBUILDER-NEXT: store i8 [[TMP2]], i8* [[I]], align 1 +// CHECK3-IRBUILDER-NEXT: store i32 11, i32* [[X6]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-IRBUILDER-NEXT: [[CONV7:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK3-IRBUILDER-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV7]], 57 +// CHECK3-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3-IRBUILDER: omp.precond.then: +// CHECK3-IRBUILDER-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK3-IRBUILDER-NEXT: store i64 [[TMP4]], i64* [[DOTOMP_UB]], align 8 +// CHECK3-IRBUILDER-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK3-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 70, i64 0, i64 [[TMP5]], i64 1, i64 1) +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) +// CHECK3-IRBUILDER-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK3-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK3-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.body: +// CHECK3-IRBUILDER-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK3-IRBUILDER-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-IRBUILDER-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP8]], [[TMP9]] +// CHECK3-IRBUILDER-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.body: +// CHECK3-IRBUILDER-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-IRBUILDER-NEXT: [[CONV12:%.*]] = sext i8 [[TMP10]] to i64 +// CHECK3-IRBUILDER-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[DIV13:%.*]] = sdiv i64 [[TMP11]], 11 +// CHECK3-IRBUILDER-NEXT: [[MUL14:%.*]] = mul nsw i64 [[DIV13]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD15:%.*]] = add nsw i64 [[CONV12]], [[MUL14]] +// CHECK3-IRBUILDER-NEXT: [[CONV16:%.*]] = trunc i64 [[ADD15]] to i8 +// CHECK3-IRBUILDER-NEXT: store i8 [[CONV16]], i8* [[I8]], align 1 +// CHECK3-IRBUILDER-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP13]], 11 +// CHECK3-IRBUILDER-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 11 +// CHECK3-IRBUILDER-NEXT: [[SUB19:%.*]] = sub nsw i64 [[TMP12]], [[MUL18]] +// CHECK3-IRBUILDER-NEXT: [[MUL20:%.*]] = mul nsw i64 [[SUB19]], 1 +// CHECK3-IRBUILDER-NEXT: [[SUB21:%.*]] = sub nsw i64 11, [[MUL20]] +// CHECK3-IRBUILDER-NEXT: [[CONV22:%.*]] = trunc i64 [[SUB21]] to i32 +// CHECK3-IRBUILDER-NEXT: store i32 [[CONV22]], i32* [[X9]], align 4 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM23:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) +// CHECK3-IRBUILDER-NEXT: [[TMP14:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP15:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP15]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] +// CHECK3-IRBUILDER-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP17:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP18:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP18]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM24]] +// CHECK3-IRBUILDER-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX25]], align 4 +// CHECK3-IRBUILDER-NEXT: [[MUL26:%.*]] = fmul float [[TMP16]], [[TMP19]] +// CHECK3-IRBUILDER-NEXT: [[TMP20:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP21:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP21]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM27]] +// CHECK3-IRBUILDER-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX28]], align 4 +// CHECK3-IRBUILDER-NEXT: [[MUL29:%.*]] = fmul float [[MUL26]], [[TMP22]] +// CHECK3-IRBUILDER-NEXT: [[TMP23:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP24:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM30:%.*]] = sext i8 [[TMP24]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM30]] +// CHECK3-IRBUILDER-NEXT: store float [[MUL29]], float* [[ARRAYIDX31]], align 4 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) +// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3-IRBUILDER: omp.body.continue: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.inc: +// CHECK3-IRBUILDER-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[ADD32:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK3-IRBUILDER-NEXT: store i64 [[ADD32]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3-IRBUILDER: omp.inner.for.end: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.inc: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK3-IRBUILDER: omp.dispatch.end: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3-IRBUILDER: omp.precond.end: +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM34:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM34]]) +// CHECK3-IRBUILDER-NEXT: ret void +// +// +// CHECK3-IRBUILDER-LABEL: define {{[^@]+}}@_Z7runtimePfS_S_S_ +// CHECK3-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK3-IRBUILDER-NEXT: entry: +// CHECK3-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-IRBUILDER-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i8, align 1 +// CHECK3-IRBUILDER-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[I:%.*]] = alloca i8, align 1 +// CHECK3-IRBUILDER-NEXT: [[X2:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: store i32 0, i32* [[X]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 69, i32 0, i32 199, i32 1, i32 1) +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +// CHECK3-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.body: +// CHECK3-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-IRBUILDER-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK3-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.body: +// CHECK3-IRBUILDER-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 20 +// CHECK3-IRBUILDER-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] +// CHECK3-IRBUILDER-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 +// CHECK3-IRBUILDER-NEXT: store i8 [[CONV]], i8* [[I]], align 1 +// CHECK3-IRBUILDER-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP6]], 20 +// CHECK3-IRBUILDER-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 20 +// CHECK3-IRBUILDER-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], [[MUL5]] +// CHECK3-IRBUILDER-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD7:%.*]] = add nsw i32 -10, [[MUL6]] +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD7]], i32* [[X2]], align 4 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) +// CHECK3-IRBUILDER-NEXT: [[TMP7:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP8:%.*]] = load i8, i8* [[I]], align 1 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP8]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP7]], i64 [[IDXPROM]] +// CHECK3-IRBUILDER-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP10:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP11:%.*]] = load i8, i8* [[I]], align 1 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP11]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP10]], i64 [[IDXPROM9]] +// CHECK3-IRBUILDER-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK3-IRBUILDER-NEXT: [[MUL11:%.*]] = fmul float [[TMP9]], [[TMP12]] +// CHECK3-IRBUILDER-NEXT: [[TMP13:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP14]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM12]] +// CHECK3-IRBUILDER-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX13]], align 4 +// CHECK3-IRBUILDER-NEXT: [[MUL14:%.*]] = fmul float [[MUL11]], [[TMP15]] +// CHECK3-IRBUILDER-NEXT: [[TMP16:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM15:%.*]] = zext i8 [[TMP17]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM15]] +// CHECK3-IRBUILDER-NEXT: store float [[MUL14]], float* [[ARRAYIDX16]], align 4 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) +// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3-IRBUILDER: omp.body.continue: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.inc: +// CHECK3-IRBUILDER-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM18:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM18]]) +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3-IRBUILDER: omp.inner.for.end: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.inc: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK3-IRBUILDER: omp.dispatch.end: +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM19:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM19]]) +// CHECK3-IRBUILDER-NEXT: ret void +// +// +// CHECK3-IRBUILDER-LABEL: define {{[^@]+}}@_Z8foo_simdii +// CHECK3-IRBUILDER-SAME: (i32 noundef [[LOW:%.*]], i32 noundef [[UP:%.*]]) #[[ATTR0]] { +// CHECK3-IRBUILDER-NEXT: entry: +// CHECK3-IRBUILDER-NEXT: [[LOW_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[UP_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_IV16:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[_TMP17:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_20:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[I26:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: [[I28:%.*]] = alloca i32, align 4 +// CHECK3-IRBUILDER-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[UP_ADDR]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-IRBUILDER-NEXT: [[SUB:%.*]] = sub i32 [[TMP2]], [[TMP3]] +// CHECK3-IRBUILDER-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 +// CHECK3-IRBUILDER-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 +// CHECK3-IRBUILDER-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK3-IRBUILDER-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 [[TMP4]], i32* [[I]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-IRBUILDER-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP5]], [[TMP6]] +// CHECK3-IRBUILDER-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK3-IRBUILDER: simd.if.then: +// CHECK3-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1 +// CHECK3-IRBUILDER-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]] +// CHECK3-IRBUILDER-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.body: +// CHECK3-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]] +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD8]], i32* [[I5]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK3-IRBUILDER-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3-IRBUILDER: omp.body.continue: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.inc: +// CHECK3-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], 1 +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK3-IRBUILDER: omp.inner.for.end: +// CHECK3-IRBUILDER-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-IRBUILDER-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK3-IRBUILDER-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], 1 +// CHECK3-IRBUILDER-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], 1 +// CHECK3-IRBUILDER-NEXT: [[MUL14:%.*]] = mul i32 [[DIV13]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD15:%.*]] = add i32 [[TMP13]], [[MUL14]] +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD15]], i32* [[I5]], align 4 +// CHECK3-IRBUILDER-NEXT: br label [[SIMD_IF_END]] +// CHECK3-IRBUILDER: simd.if.end: +// CHECK3-IRBUILDER-NEXT: [[TMP16:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 [[TMP16]], i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP17:%.*]] = load i32, i32* [[UP_ADDR]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK3-IRBUILDER-NEXT: [[SUB21:%.*]] = sub i32 [[TMP18]], [[TMP19]] +// CHECK3-IRBUILDER-NEXT: [[SUB22:%.*]] = sub i32 [[SUB21]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD23:%.*]] = add i32 [[SUB22]], 1 +// CHECK3-IRBUILDER-NEXT: [[DIV24:%.*]] = udiv i32 [[ADD23]], 1 +// CHECK3-IRBUILDER-NEXT: [[SUB25:%.*]] = sub i32 [[DIV24]], 1 +// CHECK3-IRBUILDER-NEXT: store i32 [[SUB25]], i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 [[TMP20]], i32* [[I26]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK3-IRBUILDER-NEXT: [[CMP27:%.*]] = icmp slt i32 [[TMP21]], [[TMP22]] +// CHECK3-IRBUILDER-NEXT: br i1 [[CMP27]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3-IRBUILDER: omp.precond.then: +// CHECK3-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 66, i32 0, i32 [[TMP24]], i32 1, i32 1) +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM29:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK3-IRBUILDER-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM29]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.body: +// CHECK3-IRBUILDER-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-IRBUILDER-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV16]], align 4 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.cond30: +// CHECK3-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 +// CHECK3-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] +// CHECK3-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.body33: +// CHECK3-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[MUL34:%.*]] = mul i32 [[TMP30]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD35:%.*]] = add i32 [[TMP29]], [[MUL34]] +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD35]], i32* [[I28]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, i32* [[I28]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM36]] +// CHECK3-IRBUILDER-NEXT: store float 0.000000e+00, float* [[ARRAYIDX37]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.body33.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] +// CHECK3-IRBUILDER: omp.body.continue38: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.inc39: +// CHECK3-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD40]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK3-IRBUILDER: omp.inner.for.end42: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK3-IRBUILDER: omp.dispatch.inc: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK3-IRBUILDER: omp.dispatch.end: +// CHECK3-IRBUILDER-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK3-IRBUILDER-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-IRBUILDER: .omp.final.then: +// CHECK3-IRBUILDER-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK3-IRBUILDER-NEXT: [[SUB43:%.*]] = sub i32 [[TMP36]], [[TMP37]] +// CHECK3-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[SUB43]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD45:%.*]] = add i32 [[SUB44]], 1 +// CHECK3-IRBUILDER-NEXT: [[DIV46:%.*]] = udiv i32 [[ADD45]], 1 +// CHECK3-IRBUILDER-NEXT: [[MUL47:%.*]] = mul i32 [[DIV46]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD48:%.*]] = add i32 [[TMP35]], [[MUL47]] +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD48]], i32* [[I28]], align 4 +// CHECK3-IRBUILDER-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK3-IRBUILDER: .omp.final.done: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3-IRBUILDER: omp.precond.end: +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK3-IRBUILDER-NEXT: ret void +// +// +// CHECK3-IRBUILDER-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK3-IRBUILDER-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-IRBUILDER-NEXT: entry: +// CHECK3-IRBUILDER-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-IRBUILDER-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK3-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK3-IRBUILDER-NEXT: ret void +// +// +// CHECK3-IRBUILDER-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK3-IRBUILDER-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK3-IRBUILDER-NEXT: entry: +// CHECK3-IRBUILDER-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-IRBUILDER-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK3-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK3-IRBUILDER-NEXT: ret void +// +// +// CHECK4-IRBUILDER-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_ +// CHECK4-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK4-IRBUILDER-NEXT: entry: +// CHECK4-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK4-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +// CHECK4-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.body: +// CHECK4-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-IRBUILDER-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK4-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.body: +// CHECK4-IRBUILDER-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 7 +// CHECK4-IRBUILDER-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] +// CHECK4-IRBUILDER-NEXT: store i32 [[SUB]], i32* [[I]], align 4 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK4-IRBUILDER-NEXT: [[TMP5:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP6:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 [[IDXPROM]] +// CHECK4-IRBUILDER-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP8:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 [[IDXPROM3]] +// CHECK4-IRBUILDER-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK4-IRBUILDER-NEXT: [[MUL5:%.*]] = fmul float [[TMP7]], [[TMP10]] +// CHECK4-IRBUILDER-NEXT: [[TMP11:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[IDXPROM6]] +// CHECK4-IRBUILDER-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX7]], align 4 +// CHECK4-IRBUILDER-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP13]] +// CHECK4-IRBUILDER-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM9]] +// CHECK4-IRBUILDER-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4-IRBUILDER: omp.body.continue: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.inc: +// CHECK4-IRBUILDER-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-IRBUILDER-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4-IRBUILDER: omp.inner.for.end: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.inc: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK4-IRBUILDER: omp.dispatch.end: +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) +// CHECK4-IRBUILDER-NEXT: ret void +// +// +// CHECK4-IRBUILDER-LABEL: define {{[^@]+}}@_Z8dynamic1PfS_S_S_ +// CHECK4-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK4-IRBUILDER-NEXT: entry: +// CHECK4-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i64, align 8 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[I:%.*]] = alloca i64, align 8 +// CHECK4-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK4-IRBUILDER-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 +// CHECK4-IRBUILDER-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK4-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 67, i64 0, i64 16908287, i64 1, i64 1) +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK4-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +// CHECK4-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.body: +// CHECK4-IRBUILDER-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK4-IRBUILDER-NEXT: store i64 [[TMP1]], i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK4-IRBUILDER-NEXT: [[ADD:%.*]] = add i64 [[TMP3]], 1 +// CHECK4-IRBUILDER-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP2]], [[ADD]] +// CHECK4-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.body: +// CHECK4-IRBUILDER-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[MUL:%.*]] = mul i64 [[TMP4]], 127 +// CHECK4-IRBUILDER-NEXT: [[ADD2:%.*]] = add i64 131071, [[MUL]] +// CHECK4-IRBUILDER-NEXT: store i64 [[ADD2]], i64* [[I]], align 8 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK4-IRBUILDER-NEXT: [[TMP5:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP6:%.*]] = load i64, i64* [[I]], align 8 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 [[TMP6]] +// CHECK4-IRBUILDER-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP8:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP9:%.*]] = load i64, i64* [[I]], align 8 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 [[TMP9]] +// CHECK4-IRBUILDER-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK4-IRBUILDER-NEXT: [[MUL5:%.*]] = fmul float [[TMP7]], [[TMP10]] +// CHECK4-IRBUILDER-NEXT: [[TMP11:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] +// CHECK4-IRBUILDER-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK4-IRBUILDER-NEXT: [[MUL7:%.*]] = fmul float [[MUL5]], [[TMP13]] +// CHECK4-IRBUILDER-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] +// CHECK4-IRBUILDER-NEXT: store float [[MUL7]], float* [[ARRAYIDX8]], align 4 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4-IRBUILDER: omp.body.continue: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.inc: +// CHECK4-IRBUILDER-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[ADD9:%.*]] = add i64 [[TMP16]], 1 +// CHECK4-IRBUILDER-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4-IRBUILDER: omp.inner.for.end: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.inc: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK4-IRBUILDER: omp.dispatch.end: +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) +// CHECK4-IRBUILDER-NEXT: ret void +// +// +// CHECK4-IRBUILDER-LABEL: define {{[^@]+}}@_Z9test_autoPfS_S_S_ +// CHECK4-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK4-IRBUILDER-NEXT: entry: +// CHECK4-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i8, align 1 +// CHECK4-IRBUILDER-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK4-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i64, align 8 +// CHECK4-IRBUILDER-NEXT: [[I:%.*]] = alloca i8, align 1 +// CHECK4-IRBUILDER-NEXT: [[X6:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[I8:%.*]] = alloca i8, align 1 +// CHECK4-IRBUILDER-NEXT: [[X9:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store i32 0, i32* [[X]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 0, i32* [[Y]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32, i32* [[Y]], align 4 +// CHECK4-IRBUILDER-NEXT: [[CONV:%.*]] = trunc i32 [[TMP0]] to i8 +// CHECK4-IRBUILDER-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK4-IRBUILDER-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK4-IRBUILDER-NEXT: [[CONV3:%.*]] = sext i8 [[TMP1]] to i32 +// CHECK4-IRBUILDER-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]] +// CHECK4-IRBUILDER-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1 +// CHECK4-IRBUILDER-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 +// CHECK4-IRBUILDER-NEXT: [[CONV4:%.*]] = zext i32 [[DIV]] to i64 +// CHECK4-IRBUILDER-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11 +// CHECK4-IRBUILDER-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK4-IRBUILDER-NEXT: store i64 [[SUB5]], i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK4-IRBUILDER-NEXT: store i8 [[TMP2]], i8* [[I]], align 1 +// CHECK4-IRBUILDER-NEXT: store i32 11, i32* [[X6]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK4-IRBUILDER-NEXT: [[CONV7:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK4-IRBUILDER-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV7]], 57 +// CHECK4-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4-IRBUILDER: omp.precond.then: +// CHECK4-IRBUILDER-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK4-IRBUILDER-NEXT: store i64 [[TMP4]], i64* [[DOTOMP_UB]], align 8 +// CHECK4-IRBUILDER-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK4-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 70, i64 0, i64 [[TMP5]], i64 1, i64 1) +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) +// CHECK4-IRBUILDER-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK4-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK4-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.body: +// CHECK4-IRBUILDER-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK4-IRBUILDER-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK4-IRBUILDER-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP8]], [[TMP9]] +// CHECK4-IRBUILDER-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.body: +// CHECK4-IRBUILDER-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK4-IRBUILDER-NEXT: [[CONV12:%.*]] = sext i8 [[TMP10]] to i64 +// CHECK4-IRBUILDER-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[DIV13:%.*]] = sdiv i64 [[TMP11]], 11 +// CHECK4-IRBUILDER-NEXT: [[MUL14:%.*]] = mul nsw i64 [[DIV13]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD15:%.*]] = add nsw i64 [[CONV12]], [[MUL14]] +// CHECK4-IRBUILDER-NEXT: [[CONV16:%.*]] = trunc i64 [[ADD15]] to i8 +// CHECK4-IRBUILDER-NEXT: store i8 [[CONV16]], i8* [[I8]], align 1 +// CHECK4-IRBUILDER-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP13]], 11 +// CHECK4-IRBUILDER-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 11 +// CHECK4-IRBUILDER-NEXT: [[SUB19:%.*]] = sub nsw i64 [[TMP12]], [[MUL18]] +// CHECK4-IRBUILDER-NEXT: [[MUL20:%.*]] = mul nsw i64 [[SUB19]], 1 +// CHECK4-IRBUILDER-NEXT: [[SUB21:%.*]] = sub nsw i64 11, [[MUL20]] +// CHECK4-IRBUILDER-NEXT: [[CONV22:%.*]] = trunc i64 [[SUB21]] to i32 +// CHECK4-IRBUILDER-NEXT: store i32 [[CONV22]], i32* [[X9]], align 4 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM23:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) +// CHECK4-IRBUILDER-NEXT: [[TMP14:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP15:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP15]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] +// CHECK4-IRBUILDER-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP17:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP18:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP18]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM24]] +// CHECK4-IRBUILDER-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX25]], align 4 +// CHECK4-IRBUILDER-NEXT: [[MUL26:%.*]] = fmul float [[TMP16]], [[TMP19]] +// CHECK4-IRBUILDER-NEXT: [[TMP20:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP21:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP21]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM27]] +// CHECK4-IRBUILDER-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX28]], align 4 +// CHECK4-IRBUILDER-NEXT: [[MUL29:%.*]] = fmul float [[MUL26]], [[TMP22]] +// CHECK4-IRBUILDER-NEXT: [[TMP23:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP24:%.*]] = load i8, i8* [[I8]], align 1 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM30:%.*]] = sext i8 [[TMP24]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM30]] +// CHECK4-IRBUILDER-NEXT: store float [[MUL29]], float* [[ARRAYIDX31]], align 4 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) +// CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4-IRBUILDER: omp.body.continue: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.inc: +// CHECK4-IRBUILDER-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[ADD32:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK4-IRBUILDER-NEXT: store i64 [[ADD32]], i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4-IRBUILDER: omp.inner.for.end: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.inc: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK4-IRBUILDER: omp.dispatch.end: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4-IRBUILDER: omp.precond.end: +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM34:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM34]]) +// CHECK4-IRBUILDER-NEXT: ret void +// +// +// CHECK4-IRBUILDER-LABEL: define {{[^@]+}}@_Z7runtimePfS_S_S_ +// CHECK4-IRBUILDER-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK4-IRBUILDER-NEXT: entry: +// CHECK4-IRBUILDER-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-IRBUILDER-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i8, align 1 +// CHECK4-IRBUILDER-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[I:%.*]] = alloca i8, align 1 +// CHECK4-IRBUILDER-NEXT: [[X2:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: store i32 0, i32* [[X]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 69, i32 0, i32 199, i32 1, i32 1) +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK4-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +// CHECK4-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.body: +// CHECK4-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-IRBUILDER-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK4-IRBUILDER-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.body: +// CHECK4-IRBUILDER-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 20 +// CHECK4-IRBUILDER-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] +// CHECK4-IRBUILDER-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 +// CHECK4-IRBUILDER-NEXT: store i8 [[CONV]], i8* [[I]], align 1 +// CHECK4-IRBUILDER-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP6]], 20 +// CHECK4-IRBUILDER-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 20 +// CHECK4-IRBUILDER-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], [[MUL5]] +// CHECK4-IRBUILDER-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD7:%.*]] = add nsw i32 -10, [[MUL6]] +// CHECK4-IRBUILDER-NEXT: store i32 [[ADD7]], i32* [[X2]], align 4 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) +// CHECK4-IRBUILDER-NEXT: [[TMP7:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP8:%.*]] = load i8, i8* [[I]], align 1 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP8]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP7]], i64 [[IDXPROM]] +// CHECK4-IRBUILDER-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP10:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP11:%.*]] = load i8, i8* [[I]], align 1 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP11]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP10]], i64 [[IDXPROM9]] +// CHECK4-IRBUILDER-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK4-IRBUILDER-NEXT: [[MUL11:%.*]] = fmul float [[TMP9]], [[TMP12]] +// CHECK4-IRBUILDER-NEXT: [[TMP13:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP14]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM12]] +// CHECK4-IRBUILDER-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX13]], align 4 +// CHECK4-IRBUILDER-NEXT: [[MUL14:%.*]] = fmul float [[MUL11]], [[TMP15]] +// CHECK4-IRBUILDER-NEXT: [[TMP16:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM15:%.*]] = zext i8 [[TMP17]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM15]] +// CHECK4-IRBUILDER-NEXT: store float [[MUL14]], float* [[ARRAYIDX16]], align 4 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) +// CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4-IRBUILDER: omp.body.continue: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.inc: +// CHECK4-IRBUILDER-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK4-IRBUILDER-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM18:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM18]]) +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4-IRBUILDER: omp.inner.for.end: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.inc: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK4-IRBUILDER: omp.dispatch.end: +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM19:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM19]]) +// CHECK4-IRBUILDER-NEXT: ret void +// +// +// CHECK4-IRBUILDER-LABEL: define {{[^@]+}}@_Z8foo_simdii +// CHECK4-IRBUILDER-SAME: (i32 noundef [[LOW:%.*]], i32 noundef [[UP:%.*]]) #[[ATTR0]] { +// CHECK4-IRBUILDER-NEXT: entry: +// CHECK4-IRBUILDER-NEXT: [[LOW_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[UP_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_IV16:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[_TMP17:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTCAPTURE_EXPR_20:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[I26:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: [[I28:%.*]] = alloca i32, align 4 +// CHECK4-IRBUILDER-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[UP_ADDR]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-IRBUILDER-NEXT: [[SUB:%.*]] = sub i32 [[TMP2]], [[TMP3]] +// CHECK4-IRBUILDER-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 +// CHECK4-IRBUILDER-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 +// CHECK4-IRBUILDER-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK4-IRBUILDER-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 [[TMP4]], i32* [[I]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-IRBUILDER-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP5]], [[TMP6]] +// CHECK4-IRBUILDER-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK4-IRBUILDER: simd.if.then: +// CHECK4-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK4-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK4-IRBUILDER-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1 +// CHECK4-IRBUILDER-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]] +// CHECK4-IRBUILDER-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.body: +// CHECK4-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !3 +// CHECK4-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK4-IRBUILDER-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]] +// CHECK4-IRBUILDER-NEXT: store i32 [[ADD8]], i32* [[I5]], align 4, !llvm.access.group !3 +// CHECK4-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !3 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK4-IRBUILDER-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3 +// CHECK4-IRBUILDER-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4-IRBUILDER: omp.body.continue: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.inc: +// CHECK4-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK4-IRBUILDER-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], 1 +// CHECK4-IRBUILDER-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK4-IRBUILDER: omp.inner.for.end: +// CHECK4-IRBUILDER-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-IRBUILDER-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK4-IRBUILDER-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], 1 +// CHECK4-IRBUILDER-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], 1 +// CHECK4-IRBUILDER-NEXT: [[MUL14:%.*]] = mul i32 [[DIV13]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD15:%.*]] = add i32 [[TMP13]], [[MUL14]] +// CHECK4-IRBUILDER-NEXT: store i32 [[ADD15]], i32* [[I5]], align 4 +// CHECK4-IRBUILDER-NEXT: br label [[SIMD_IF_END]] +// CHECK4-IRBUILDER: simd.if.end: +// CHECK4-IRBUILDER-NEXT: [[TMP16:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 [[TMP16]], i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP17:%.*]] = load i32, i32* [[UP_ADDR]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK4-IRBUILDER-NEXT: [[SUB21:%.*]] = sub i32 [[TMP18]], [[TMP19]] +// CHECK4-IRBUILDER-NEXT: [[SUB22:%.*]] = sub i32 [[SUB21]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD23:%.*]] = add i32 [[SUB22]], 1 +// CHECK4-IRBUILDER-NEXT: [[DIV24:%.*]] = udiv i32 [[ADD23]], 1 +// CHECK4-IRBUILDER-NEXT: [[SUB25:%.*]] = sub i32 [[DIV24]], 1 +// CHECK4-IRBUILDER-NEXT: store i32 [[SUB25]], i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 [[TMP20]], i32* [[I26]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK4-IRBUILDER-NEXT: [[CMP27:%.*]] = icmp slt i32 [[TMP21]], [[TMP22]] +// CHECK4-IRBUILDER-NEXT: br i1 [[CMP27]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4-IRBUILDER: omp.precond.then: +// CHECK4-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 66, i32 0, i32 [[TMP24]], i32 1, i32 1) +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM29:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK4-IRBUILDER-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM29]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK4-IRBUILDER-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK4-IRBUILDER-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.body: +// CHECK4-IRBUILDER-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-IRBUILDER-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV16]], align 4 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.cond30: +// CHECK4-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 +// CHECK4-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] +// CHECK4-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.body33: +// CHECK4-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[MUL34:%.*]] = mul i32 [[TMP30]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD35:%.*]] = add i32 [[TMP29]], [[MUL34]] +// CHECK4-IRBUILDER-NEXT: store i32 [[ADD35]], i32* [[I28]], align 4, !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, i32* [[I28]], align 4, !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM36]] +// CHECK4-IRBUILDER-NEXT: store float 0.000000e+00, float* [[ARRAYIDX37]], align 4, !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.body33.ordered.after: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] +// CHECK4-IRBUILDER: omp.body.continue38: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.inc39: +// CHECK4-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 +// CHECK4-IRBUILDER-NEXT: store i32 [[ADD40]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK4-IRBUILDER: omp.inner.for.end42: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK4-IRBUILDER: omp.dispatch.inc: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK4-IRBUILDER: omp.dispatch.end: +// CHECK4-IRBUILDER-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK4-IRBUILDER-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK4-IRBUILDER: .omp.final.then: +// CHECK4-IRBUILDER-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 +// CHECK4-IRBUILDER-NEXT: [[SUB43:%.*]] = sub i32 [[TMP36]], [[TMP37]] +// CHECK4-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[SUB43]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD45:%.*]] = add i32 [[SUB44]], 1 +// CHECK4-IRBUILDER-NEXT: [[DIV46:%.*]] = udiv i32 [[ADD45]], 1 +// CHECK4-IRBUILDER-NEXT: [[MUL47:%.*]] = mul i32 [[DIV46]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD48:%.*]] = add i32 [[TMP35]], [[MUL47]] +// CHECK4-IRBUILDER-NEXT: store i32 [[ADD48]], i32* [[I28]], align 4 +// CHECK4-IRBUILDER-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK4-IRBUILDER: .omp.final.done: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4-IRBUILDER: omp.precond.end: +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK4-IRBUILDER-NEXT: ret void +// +// +// CHECK4-IRBUILDER-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK4-IRBUILDER-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-IRBUILDER-NEXT: entry: +// CHECK4-IRBUILDER-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-IRBUILDER-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK4-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK4-IRBUILDER-NEXT: ret void +// +// +// CHECK4-IRBUILDER-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK4-IRBUILDER-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK4-IRBUILDER-NEXT: entry: +// CHECK4-IRBUILDER-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-IRBUILDER-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK4-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK4-IRBUILDER-NEXT: ret void +// +// // CHECK5-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_ // CHECK5-SAME: (float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK5-NEXT: entry: diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -23,6 +23,52 @@ namespace llvm { class CanonicalLoopInfo; +/// Move the instruction after an InsertPoint to the beginning of another +/// BasicBlock. +/// +/// The instructions after \p IP are moved to the beginning of \p New which must +/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to +/// \p New will be added such that there is no semantic change. Otherwise, the +/// \p IP insert block remains degenerate and it is up to the caller to insert a +/// terminator. +void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, + bool CreateBranch); + +/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new +/// insert location will stick to after the instruction before the insertion +/// point (instead of moving with the instruction the InsertPoint stores +/// internally). +void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch); + +/// Split a BasicBlock at an InsertPoint, even if the block is degenerate +/// (missing the terminator). +/// +/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed +/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch +/// is true, a branch to the new successor will new created such that +/// semantically there is no change; otherwise the block of the insertion point +/// remains degenerate and it is the caller's responsibility to insert a +/// terminator. Returns the new successor block. +BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, + llvm::Twine Name = {}); + +/// Split a BasicBlock at \p Builder's insertion point, even if the block is +/// degenerate (missing the terminator). Its new insert location will stick to +/// after the instruction before the insertion point (instead of moving with the +/// instruction the InsertPoint stores internally). +BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch, + llvm::Twine Name = {}); + +/// Split a BasicBlock at \p Builder's insertion point, even if the block is +/// degenerate (missing the terminator). Its new insert location will stick to +/// after the instruction before the insertion point (instead of moving with the +/// instruction the InsertPoint stores internally). +BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name); + +/// Like splitBB, but reuses the current block's name for the new name. +BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, + llvm::Twine Suffix = ".split"); + /// An interface to create LLVM-IR for OpenMP directives. /// /// Each OpenMP directive has a corresponding public generator method. @@ -87,27 +133,36 @@ /// Callback type for body (=inner region) code generation /// /// The callback takes code locations as arguments, each describing a - /// location at which code might need to be generated or a location that is - /// the target of control transfer. + /// location where additional instructions can be inserted. + /// + /// The CodeGenIP may be in the middle of a basic block or point to the end of + /// it. The basic block may have a terminator or be degenerate. The callback + /// function may just insert instructions at that position, but also split the + /// block (without the Before argument of BasicBlock::splitBasicBlock such + /// that the identify of the split predecessor block is preserved) and insert + /// additional control flow, including branches that do not lead back to what + /// follows the CodeGenIP. Note that since the callback is allowed to split + /// the block, callers must assume that InsertPoints to positions in the + /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If + /// such InsertPoints need to be preserved, it can split the block itself + /// before calling the callback. + /// + /// AllocaIP and CodeGenIP must not point to the same position. /// /// \param AllocaIP is the insertion point at which new alloca instructions - /// should be placed. + /// should be placed. The BasicBlock it is pointing to must + /// not be split. /// \param CodeGenIP is the insertion point at which the body code should be /// placed. - /// \param ContinuationBB is the basic block target to leave the body. - /// - /// Note that all blocks pointed to by the arguments have terminators. using BodyGenCallbackTy = - function_ref; + function_ref; // This is created primarily for sections construct as llvm::function_ref // (BodyGenCallbackTy) is not storable (as described in the comments of // function_ref class - function_ref contains non-ownable reference // to the callable. using StorableBodyGenCallbackTy = - std::function; + std::function; /// Callback type for loop body code generation. /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -252,16 +252,8 @@ NewBr->setDebugLoc(DL); } -/// Move the instruction after an InsertPoint to the beginning of another -/// BasicBlock. -/// -/// The instructions after \p IP are moved to the beginning of \p New which must -/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to -/// \p New will be added such that there is no semantic change. Otherwise, the -/// \p IP insert block remains degenerate and it is up to the caller to insert a -/// terminator. -static void spliceBB(OpenMPIRBuilder::InsertPointTy IP, BasicBlock *New, - bool CreateBranch) { +void llvm::spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, + bool CreateBranch) { assert(New->getFirstInsertionPt() == New->begin() && "Target BB must not have PHI nodes"); @@ -274,11 +266,7 @@ BranchInst::Create(New, Old); } -/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new -/// insert location will stick to after the instruction before the insertion -/// point (instead of moving with the instruction the InsertPoint stores -/// internally). -static void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch) { +void llvm::spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch) { DebugLoc DebugLoc = Builder.getCurrentDebugLocation(); BasicBlock *Old = Builder.GetInsertBlock(); @@ -293,17 +281,8 @@ Builder.SetCurrentDebugLocation(DebugLoc); } -/// Split a BasicBlock at an InsertPoint, even if the block is degenerate -/// (missing the terminator). -/// -/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed -/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch -/// is true, a branch to the new successor will new created such that -/// semantically there is no change; otherwise the block of the insertion point -/// remains degenerate and it is the caller's responsibility to insert a -/// terminator. Returns the new successor block. -static BasicBlock *splitBB(OpenMPIRBuilder::InsertPointTy IP, bool CreateBranch, - llvm::Twine Name = {}) { +BasicBlock *llvm::splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, + llvm::Twine Name) { BasicBlock *Old = IP.getBlock(); BasicBlock *New = BasicBlock::Create( Old->getContext(), Name.isTriviallyEmpty() ? Old->getName() : Name, @@ -313,12 +292,22 @@ return New; } -/// Split a BasicBlock at \p Builder's insertion point, even if the block is -/// degenerate (missing the terminator). Its new insert location will stick to -/// after the instruction before the insertion point (instead of moving with the -/// instruction the InsertPoint stores internally). -static BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, - llvm::Twine Name = {}) { +BasicBlock *llvm::splitBB(IRBuilderBase &Builder, bool CreateBranch, + llvm::Twine Name) { + DebugLoc DebugLoc = Builder.getCurrentDebugLocation(); + BasicBlock *New = splitBB(Builder.saveIP(), CreateBranch, Name); + if (CreateBranch) + Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator()); + else + Builder.SetInsertPoint(Builder.GetInsertBlock()); + // SetInsertPoint also updates the Builder's debug location, but we want to + // keep the one the Builder was configured to use. + Builder.SetCurrentDebugLocation(DebugLoc); + return New; +} + +BasicBlock *llvm::splitBB(IRBuilder<> &Builder, bool CreateBranch, + llvm::Twine Name) { DebugLoc DebugLoc = Builder.getCurrentDebugLocation(); BasicBlock *New = splitBB(Builder.saveIP(), CreateBranch, Name); if (CreateBranch) @@ -331,6 +320,12 @@ return New; } +BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, + llvm::Twine Suffix) { + BasicBlock *Old = Builder.GetInsertBlock(); + return splitBB(Builder, CreateBranch, Old->getName() + Suffix); +} + void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { LLVMContext &Ctx = Fn.getContext(); @@ -928,7 +923,7 @@ // Let the caller create the body. assert(BodyGenCB && "Expected body generation callback!"); InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); - BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB); + BodyGenCB(InnerAllocaIP, CodeGenIP); LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); @@ -1268,26 +1263,25 @@ // section_loop.after: // ; auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) { - auto *CurFn = CodeGenIP.getBlock()->getParent(); - auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor(); - auto *ForExitBB = CodeGenIP.getBlock() - ->getSinglePredecessor() - ->getTerminator() - ->getSuccessor(1); - SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB); Builder.restoreIP(CodeGenIP); + BasicBlock *Continue = + splitBBWithSuffix(Builder, /*CreateBranch=*/false, ".sections.after"); + Function *CurFn = Continue->getParent(); + SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, Continue); + unsigned CaseNumber = 0; for (auto SectionCB : SectionCBs) { - auto *CaseBB = BasicBlock::Create(M.getContext(), - "omp_section_loop.body.case", CurFn); + BasicBlock *CaseBB = BasicBlock::Create( + M.getContext(), "omp_section_loop.body.case", CurFn, Continue); SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); Builder.SetInsertPoint(CaseBB); - SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB); + BranchInst *CaseEndBr = Builder.CreateBr(Continue); + SectionCB(InsertPointTy(), + {CaseEndBr->getParent(), CaseEndBr->getIterator()}); CaseNumber++; } // remove the existing terminator from body BB since there can be no // terminators after switch/case - CodeGenIP.getBlock()->getTerminator()->eraseFromParent(); }; // Loop body ends here // LowerBound, UpperBound, and STride for createCanonicalLoop @@ -1297,29 +1291,22 @@ Value *ST = ConstantInt::get(I32Ty, 1); llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop( Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); - Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator()); - AllocaIP = Builder.saveIP(); InsertPointTy AfterIP = applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait); - BasicBlock *LoopAfterBB = AfterIP.getBlock(); - Instruction *SplitPos = LoopAfterBB->getTerminator(); - if (!isa_and_nonnull(SplitPos)) - SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB); - // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB, - // which requires a BB with branch - BasicBlock *ExitBB = - LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end"); - SplitPos->eraseFromParent(); // Apply the finalization callback in LoopAfterBB auto FiniInfo = FinalizationStack.pop_back_val(); assert(FiniInfo.DK == OMPD_sections && "Unexpected finalization stack state!"); - Builder.SetInsertPoint(LoopAfterBB->getTerminator()); - FiniInfo.FiniCB(Builder.saveIP()); - Builder.SetInsertPoint(ExitBB); + if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) { + Builder.restoreIP(AfterIP); + BasicBlock *FiniBB = + splitBBWithSuffix(Builder, /*CreateBranch=*/true, "sections.fini"); + CB(Builder.saveIP()); + AfterIP = {FiniBB, FiniBB->begin()}; + } - return Builder.saveIP(); + return AfterIP; } OpenMPIRBuilder::InsertPointTy @@ -3117,48 +3104,28 @@ // generate body BodyGenCB(/* AllocaIP */ InsertPointTy(), - /* CodeGenIP */ Builder.saveIP(), *FiniBB); - - // If we didn't emit a branch to FiniBB during body generation, it means - // FiniBB is unreachable (e.g. while(1);). stop generating all the - // unreachable blocks, and remove anything we are not going to use. - auto SkipEmittingRegion = FiniBB->hasNPredecessors(0); - if (SkipEmittingRegion) { - FiniBB->eraseFromParent(); - ExitCall->eraseFromParent(); - // Discard finalization if we have it. - if (HasFinalize) { - assert(!FinalizationStack.empty() && - "Unexpected finalization stack state!"); - FinalizationStack.pop_back(); - } - } else { - // emit exit call and do any needed finalization. - auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); - assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && - FiniBB->getTerminator()->getSuccessor(0) == ExitBB && - "Unexpected control flow graph state!!"); - emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); - assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && - "Unexpected Control Flow State!"); - MergeBlockIntoPredecessor(FiniBB); - } + /* CodeGenIP */ Builder.saveIP()); + + // emit exit call and do any needed finalization. + auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); + assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && + FiniBB->getTerminator()->getSuccessor(0) == ExitBB && + "Unexpected control flow graph state!!"); + emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); + assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && + "Unexpected Control Flow State!"); + MergeBlockIntoPredecessor(FiniBB); // If we are skipping the region of a non conditional, remove the exit // block, and clear the builder's insertion point. assert(SplitPos->getParent() == ExitBB && "Unexpected Insertion point location!"); - if (!Conditional && SkipEmittingRegion) { - ExitBB->eraseFromParent(); - Builder.ClearInsertionPoint(); - } else { - auto merged = MergeBlockIntoPredecessor(ExitBB); - BasicBlock *ExitPredBB = SplitPos->getParent(); - auto InsertBB = merged ? ExitPredBB : ExitBB; - if (!isa_and_nonnull(SplitPos)) - SplitPos->eraseFromParent(); - Builder.SetInsertPoint(InsertBB); - } + auto merged = MergeBlockIntoPredecessor(ExitBB); + BasicBlock *ExitPredBB = SplitPos->getParent(); + auto InsertBB = merged ? ExitPredBB : ExitBB; + if (!isa_and_nonnull(SplitPos)) + SplitPos->eraseFromParent(); + Builder.SetInsertPoint(InsertBB); return Builder.saveIP(); } diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -935,8 +935,7 @@ SmallDenseMap> BB2PRMap; BasicBlock *StartBB = nullptr, *EndBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationIP) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { BasicBlock *CGStartBB = CodeGenIP.getBlock(); BasicBlock *CGEndBB = SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); @@ -975,8 +974,7 @@ const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); ParentBB->getTerminator()->eraseFromParent(); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationIP) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { BasicBlock *CGStartBB = CodeGenIP.getBlock(); BasicBlock *CGEndBB = SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -603,8 +603,7 @@ unsigned NumPrivatizedVars = 0; unsigned NumFinalizationPoints = 0; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationIP) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumBodiesGenerated; Builder.restoreIP(AllocaIP); @@ -618,10 +617,6 @@ Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), &ThenTerm, &ElseTerm); - - Builder.SetInsertPoint(ThenTerm); - Builder.CreateBr(&ContinuationIP); - ThenTerm->eraseFromParent(); }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, @@ -711,8 +706,7 @@ unsigned NumOuterBodiesGenerated = 0; unsigned NumFinalizationPoints = 0; - auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationIP) { + auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumInnerBodiesGenerated; }; @@ -731,8 +725,7 @@ auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; - auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationIP) { + auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumOuterBodiesGenerated; Builder.restoreIP(CodeGenIP); BasicBlock *CGBB = CodeGenIP.getBlock(); @@ -807,8 +800,7 @@ unsigned NumOuterBodiesGenerated = 0; unsigned NumFinalizationPoints = 0; - auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationIP) { + auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumInnerBodiesGenerated; }; @@ -827,8 +819,7 @@ auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; - auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationIP) { + auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumOuterBodiesGenerated; Builder.restoreIP(CodeGenIP); BasicBlock *CGBB = CodeGenIP.getBlock(); @@ -920,8 +911,7 @@ unsigned NumPrivatizedVars = 0; unsigned NumFinalizationPoints = 0; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationIP) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumBodiesGenerated; Builder.restoreIP(AllocaIP); @@ -933,12 +923,8 @@ Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); Instruction *ThenTerm, *ElseTerm; - SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), - &ThenTerm, &ElseTerm); - - Builder.SetInsertPoint(ThenTerm); - Builder.CreateBr(&ContinuationIP); - ThenTerm->eraseFromParent(); + SplitBlockAndInsertIfThenElse(Cmp, &*Builder.GetInsertPoint(), &ThenTerm, + &ElseTerm); }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, @@ -1046,8 +1032,7 @@ unsigned NumFinalizationPoints = 0; CallInst *CheckedBarrier = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationIP) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumBodiesGenerated; Builder.restoreIP(CodeGenIP); @@ -1178,8 +1163,7 @@ Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc); Instruction *Internal; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationBB) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { IRBuilder<>::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); Internal = Builder.CreateCall(TakeI32Func, I32Val); @@ -2309,11 +2293,9 @@ AllocaInst *PrivAI = nullptr; BasicBlock *EntryBB = nullptr; - BasicBlock *ExitBB = nullptr; BasicBlock *ThenBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &FiniBB) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { if (AllocaIP.isSet()) Builder.restoreIP(AllocaIP); else @@ -2328,7 +2310,6 @@ Builder.restoreIP(CodeGenIP); // collect some info for checks later - ExitBB = FiniBB.getUniqueSuccessor(); ThenBB = Builder.GetInsertBlock(); EntryBB = ThenBB->getUniquePredecessor(); @@ -2350,7 +2331,7 @@ BranchInst *EntryBr = cast(EntryBB->getTerminator()); EXPECT_TRUE(EntryBr->isConditional()); EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); - EXPECT_EQ(ThenBB->getUniqueSuccessor(), ExitBB); + BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); CmpInst *CondInst = cast(EntryBr->getCondition()); @@ -2389,11 +2370,9 @@ AllocaInst *PrivAI = nullptr; BasicBlock *EntryBB = nullptr; - BasicBlock *ExitBB = nullptr; BasicBlock *ThenBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &FiniBB) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { if (AllocaIP.isSet()) Builder.restoreIP(AllocaIP); else @@ -2408,7 +2387,6 @@ Builder.restoreIP(CodeGenIP); // collect some info for checks later - ExitBB = FiniBB.getUniqueSuccessor(); ThenBB = Builder.GetInsertBlock(); EntryBB = ThenBB->getUniquePredecessor(); @@ -2432,7 +2410,7 @@ BranchInst *EntryBr = cast(EntryBB->getTerminator()); EXPECT_TRUE(EntryBr->isConditional()); EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); - EXPECT_EQ(ThenBB->getUniqueSuccessor(), ExitBB); + BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); CmpInst *CondInst = cast(EntryBr->getCondition()); @@ -2470,18 +2448,11 @@ AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); - BasicBlock *EntryBB = nullptr; - - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &FiniBB) { - // collect some info for checks later - EntryBB = FiniBB.getUniquePredecessor(); - + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { // actual start for bodyCB llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); - EXPECT_EQ(EntryBB, CodeGenIPBB); // body begin Builder.restoreIP(CodeGenIP); @@ -2495,13 +2466,11 @@ BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; + BasicBlock *EntryBB = Builder.GetInsertBlock(); Builder.restoreIP(OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB, "testCRT", nullptr)); - Value *EntryBBTI = EntryBB->getTerminator(); - EXPECT_EQ(EntryBBTI, nullptr); - CallInst *CriticalEntryCI = nullptr; for (auto &EI : *EntryBB) { Instruction *cur = &EI; @@ -2720,16 +2689,10 @@ AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); - BasicBlock *EntryBB = nullptr; - - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &FiniBB) { - EntryBB = FiniBB.getUniquePredecessor(); - + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); - EXPECT_EQ(EntryBB, CodeGenIPBB); Builder.restoreIP(CodeGenIP); Builder.CreateStore(F->arg_begin(), PrivAI); @@ -2744,6 +2707,7 @@ }; // Test for "#omp ordered [threads]" + BasicBlock *EntryBB = Builder.GetInsertBlock(); Builder.restoreIP( OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, true)); @@ -2796,16 +2760,10 @@ AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); - BasicBlock *EntryBB = nullptr; - - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &FiniBB) { - EntryBB = FiniBB.getUniquePredecessor(); - + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); - EXPECT_EQ(EntryBB, CodeGenIPBB); Builder.restoreIP(CodeGenIP); Builder.CreateStore(F->arg_begin(), PrivAI); @@ -2820,6 +2778,7 @@ }; // Test for "#omp ordered simd" + BasicBlock *EntryBB = Builder.GetInsertBlock(); Builder.restoreIP( OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, false)); @@ -2903,11 +2862,9 @@ AllocaInst *PrivAI = nullptr; BasicBlock *EntryBB = nullptr; - BasicBlock *ExitBB = nullptr; BasicBlock *ThenBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &FiniBB) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { if (AllocaIP.isSet()) Builder.restoreIP(AllocaIP); else @@ -2922,7 +2879,6 @@ Builder.restoreIP(CodeGenIP); // collect some info for checks later - ExitBB = FiniBB.getUniqueSuccessor(); ThenBB = Builder.GetInsertBlock(); EntryBB = ThenBB->getUniquePredecessor(); @@ -2945,7 +2901,7 @@ BranchInst *EntryBr = cast(EntryBB->getTerminator()); EXPECT_TRUE(EntryBr->isConditional()); EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); - EXPECT_EQ(ThenBB->getUniqueSuccessor(), ExitBB); + BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); CmpInst *CondInst = cast(EntryBr->getCondition()); @@ -2996,11 +2952,9 @@ AllocaInst *PrivAI = nullptr; BasicBlock *EntryBB = nullptr; - BasicBlock *ExitBB = nullptr; BasicBlock *ThenBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &FiniBB) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { if (AllocaIP.isSet()) Builder.restoreIP(AllocaIP); else @@ -3015,7 +2969,6 @@ Builder.restoreIP(CodeGenIP); // collect some info for checks later - ExitBB = FiniBB.getUniqueSuccessor(); ThenBB = Builder.GetInsertBlock(); EntryBB = ThenBB->getUniquePredecessor(); @@ -3038,7 +2991,7 @@ BranchInst *EntryBr = cast(EntryBB->getTerminator()); EXPECT_TRUE(EntryBr->isConditional()); EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); - EXPECT_EQ(ThenBB->getUniqueSuccessor(), ExitBB); + BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); CmpInst *CondInst = cast(EntryBr->getCondition()); @@ -3718,8 +3671,7 @@ // xor of thread-id; // and store the result in global variables. InsertPointTy BodyIP, BodyAllocaIP; - auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationBB) { + auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); @@ -3958,8 +3910,7 @@ InsertPointTy FirstBodyIP, FirstBodyAllocaIP; auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP, - InsertPointTy CodeGenIP, - BasicBlock &ContinuationBB) { + InsertPointTy CodeGenIP) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); @@ -3979,8 +3930,7 @@ InsertPointTy SecondBodyIP, SecondBodyAllocaIP; auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP, - InsertPointTy CodeGenIP, - BasicBlock &ContinuationBB) { + InsertPointTy CodeGenIP) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); @@ -4126,11 +4076,7 @@ llvm::SmallVector CaseBBs; auto FiniCB = [&](InsertPointTy IP) {}; - auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &FiniBB) { - Builder.restoreIP(CodeGenIP); - Builder.CreateBr(&FiniBB); - }; + auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; SectionCBVector.push_back(SectionCB); auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, @@ -4158,8 +4104,6 @@ llvm::SmallVector CaseBBs; BasicBlock *SwitchBB = nullptr; - BasicBlock *ForExitBB = nullptr; - BasicBlock *ForIncBB = nullptr; AllocaInst *PrivAI = nullptr; SwitchInst *Switch = nullptr; @@ -4173,8 +4117,7 @@ EXPECT_NE(IPBB->end(), IP.getPoint()); }; - auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &FiniBB) { + auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumBodiesGenerated; CaseBBs.push_back(CodeGenIP.getBlock()); SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor(); @@ -4183,9 +4126,6 @@ Value *PrivLoad = Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca"); Builder.CreateICmpNE(F->arg_begin(), PrivLoad); - Builder.CreateBr(&FiniBB); - ForIncBB = - CodeGenIP.getBlock()->getSinglePredecessor()->getSingleSuccessor(); }; auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { @@ -4204,7 +4144,7 @@ // Switch BB's predecessor is loop condition BB, whose successor at index 1 is // loop's exit BB - ForExitBB = + BasicBlock *ForExitBB = SwitchBB->getSinglePredecessor()->getTerminator()->getSuccessor(1); EXPECT_NE(ForExitBB, nullptr); @@ -4213,7 +4153,6 @@ EXPECT_EQ(F, OutlinedFn); EXPECT_FALSE(verifyModule(*M, &errs())); EXPECT_EQ(OutlinedFn->arg_size(), 1U); - EXPECT_EQ(OutlinedFn->getBasicBlockList().size(), size_t(11)); BasicBlock *LoopPreheaderBB = OutlinedFn->getEntryBlock().getSingleSuccessor(); @@ -4254,13 +4193,10 @@ EXPECT_EQ(isa(SwitchBB->getTerminator()), true); Switch = cast(SwitchBB->getTerminator()); EXPECT_EQ(Switch->getNumCases(), 2U); - EXPECT_NE(ForIncBB, nullptr); - EXPECT_EQ(Switch->getSuccessor(0), ForIncBB); EXPECT_EQ(CaseBBs.size(), 2U); for (auto *&CaseBB : CaseBBs) { EXPECT_EQ(CaseBB->getParent(), OutlinedFn); - EXPECT_EQ(CaseBB->getSingleSuccessor(), ForExitBB); } ASSERT_EQ(NumBodiesGenerated, 2U); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -95,6 +95,8 @@ // also be used for alloca insertion which would result in insertion order // confusion. Create a new BasicBlock for the Builder and use the entry block // for the allocs. + // TODO: Create a dedicated alloca BasicBlock at function creation such that + // we do not need to move the current InertPoint here. if (builder.GetInsertBlock() == &builder.GetInsertBlock()->getParent()->getEntryBlock()) { assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && @@ -117,11 +119,14 @@ /// region, and a branch from any block with an successor-less OpenMP terminator /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes /// of the continuation block if provided. -static void convertOmpOpRegions( - Region ®ion, StringRef blockName, llvm::BasicBlock &sourceBlock, - llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder, +static llvm::BasicBlock *convertOmpOpRegions( + Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, SmallVectorImpl *continuationBlockPHIs = nullptr) { + llvm::BasicBlock *continuationBlock = + splitBB(builder, true, "omp.region.cont"); + llvm::BasicBlock *sourceBlock = builder.GetInsertBlock(); + llvm::LLVMContext &llvmContext = builder.getContext(); for (Block &bb : region) { llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create( @@ -130,7 +135,7 @@ moduleTranslation.mapBlock(&bb, llvmBB); } - llvm::Instruction *sourceTerminator = sourceBlock.getTerminator(); + llvm::Instruction *sourceTerminator = sourceBlock->getTerminator(); // Terminators (namely YieldOp) may be forwarding values to the region that // need to be available in the continuation block. Collect the types of these @@ -170,7 +175,7 @@ if (continuationBlockPHIs) { llvm::IRBuilderBase::InsertPointGuard guard(builder); continuationBlockPHIs->reserve(continuationBlockPHITypes.size()); - builder.SetInsertPoint(&continuationBlock, continuationBlock.begin()); + builder.SetInsertPoint(continuationBlock, continuationBlock->begin()); for (llvm::Type *ty : continuationBlockPHITypes) continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields)); } @@ -186,7 +191,7 @@ if (bb->isEntryBlock()) { assert(sourceTerminator->getNumSuccessors() == 1 && "provided entry block has multiple successors"); - assert(sourceTerminator->getSuccessor(0) == &continuationBlock && + assert(sourceTerminator->getSuccessor(0) == continuationBlock && "ContinuationBlock is not the successor of the entry block"); sourceTerminator->setSuccessor(0, llvmBB); } @@ -195,7 +200,7 @@ if (failed( moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) { bodyGenStatus = failure(); - return; + return continuationBlock; } // Special handling for `omp.yield` and `omp.terminator` (we may have more @@ -207,7 +212,7 @@ // in the same code that handles the region-owning operation. Operation *terminator = bb->getTerminator(); if (isa(terminator)) { - builder.CreateBr(&continuationBlock); + builder.CreateBr(continuationBlock); for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i) (*continuationBlockPHIs)[i]->addIncoming( @@ -223,6 +228,8 @@ // be converted several times, that is cloned, without clashes, and slightly // speeds up the lookups. moduleTranslation.forgetMapping(region); + + return continuationBlock; } /// Convert ProcBindKind from MLIR-generated enum to LLVM enum. @@ -249,16 +256,15 @@ // relying on captured variables. LogicalResult bodyGenStatus = success(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, - llvm::BasicBlock &continuationBlock) { + auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // Save the alloca insertion point on ModuleTranslation stack for use in // nested regions. LLVM::ModuleTranslation::SaveStack frame( moduleTranslation, allocaIP); // ParallelOp has only one region associated with it. - convertOmpOpRegions(opInst.getRegion(), "omp.par.region", - *codeGenIP.getBlock(), continuationBlock, builder, + builder.restoreIP(codeGenIP); + convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder, moduleTranslation, bodyGenStatus); }; @@ -308,12 +314,11 @@ // relying on captured variables. LogicalResult bodyGenStatus = success(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, - llvm::BasicBlock &continuationBlock) { + auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // MasterOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); - convertOmpOpRegions(region, "omp.master.region", *codeGenIP.getBlock(), - continuationBlock, builder, moduleTranslation, + builder.restoreIP(codeGenIP); + convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation, bodyGenStatus); }; @@ -337,13 +342,12 @@ // relying on captured variables. LogicalResult bodyGenStatus = success(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, - llvm::BasicBlock &continuationBlock) { + auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // CriticalOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); - convertOmpOpRegions(region, "omp.critical.region", *codeGenIP.getBlock(), - continuationBlock, builder, moduleTranslation, - bodyGenStatus); + builder.restoreIP(codeGenIP); + convertOmpOpRegions(region, "omp.critical.region", builder, + moduleTranslation, bodyGenStatus); }; // TODO: Perform finalization actions for variables. This has to be @@ -438,19 +442,10 @@ return success(); } - // Create the continuation block manually instead of calling splitBlock - // because the current insertion block may not have a terminator. - llvm::BasicBlock *continuationBlock = - llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont", - builder.GetInsertBlock()->getParent(), - builder.GetInsertBlock()->getNextNode()); - builder.CreateBr(continuationBlock); - LogicalResult bodyGenStatus = success(); SmallVector phis; - convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(), - *continuationBlock, builder, moduleTranslation, - bodyGenStatus, &phis); + llvm::BasicBlock *continuationBlock = convertOmpOpRegions( + region, blockName, builder, moduleTranslation, bodyGenStatus, &phis); if (failed(bodyGenStatus)) return failure(); if (continuationBlockArgs) @@ -578,13 +573,12 @@ // relying on captured variables. LogicalResult bodyGenStatus = success(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, - llvm::BasicBlock &continuationBlock) { + auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // OrderedOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); - convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(), - continuationBlock, builder, moduleTranslation, - bodyGenStatus); + builder.restoreIP(codeGenIP); + convertOmpOpRegions(region, "omp.ordered.region", builder, + moduleTranslation, bodyGenStatus); }; // TODO: Perform finalization actions for variables. This has to be @@ -626,12 +620,10 @@ Region ®ion = sectionOp.region(); auto sectionCB = [®ion, &builder, &moduleTranslation, &bodyGenStatus]( - InsertPointTy allocaIP, InsertPointTy codeGenIP, - llvm::BasicBlock &finiBB) { + InsertPointTy allocaIP, InsertPointTy codeGenIP) { builder.restoreIP(codeGenIP); - builder.CreateBr(&finiBB); - convertOmpOpRegions(region, "omp.section.region", *codeGenIP.getBlock(), - finiBB, builder, moduleTranslation, bodyGenStatus); + convertOmpOpRegions(region, "omp.section.region", builder, + moduleTranslation, bodyGenStatus); }; sectionCBs.push_back(sectionCB); } @@ -674,10 +666,9 @@ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); LogicalResult bodyGenStatus = success(); - auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP, - llvm::BasicBlock &continuationBB) { - convertOmpOpRegions(singleOp.region(), "omp.single.region", - *codegenIP.getBlock(), continuationBB, builder, + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { + builder.restoreIP(codegenIP); + convertOmpOpRegions(singleOp.region(), "omp.single.region", builder, moduleTranslation, bodyGenStatus); }; auto finiCB = [&](InsertPointTy codeGenIP) {}; @@ -783,11 +774,9 @@ return; // Convert the body of the loop. - llvm::BasicBlock *entryBlock = ip.getBlock(); - llvm::BasicBlock *exitBlock = - entryBlock->splitBasicBlock(ip.getPoint(), "omp.wsloop.exit"); - convertOmpOpRegions(loop.region(), "omp.wsloop.region", *entryBlock, - *exitBlock, builder, moduleTranslation, bodyGenStatus); + builder.restoreIP(ip); + convertOmpOpRegions(loop.region(), "omp.wsloop.region", builder, + moduleTranslation, bodyGenStatus); }; // Delegate actual loop construction to the OpenMP IRBuilder. @@ -922,11 +911,9 @@ return; // Convert the body of the loop. - llvm::BasicBlock *entryBlock = ip.getBlock(); - llvm::BasicBlock *exitBlock = - entryBlock->splitBasicBlock(ip.getPoint(), "omp.simdloop.exit"); - convertOmpOpRegions(loop.region(), "omp.simdloop.region", *entryBlock, - *exitBlock, builder, moduleTranslation, bodyGenStatus); + builder.restoreIP(ip); + convertOmpOpRegions(loop.region(), "omp.simdloop.region", builder, + moduleTranslation, bodyGenStatus); }; // Delegate actual loop construction to the OpenMP IRBuilder. diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -1147,11 +1147,10 @@ omp.wsloop ordered(0) for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) { - // CHECK: [[OMP_THREAD:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) - // CHECK-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]]) + // CHECK: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_THREAD2:%.*]]) omp.ordered_region { omp.terminator - // CHECK: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]]) + // CHECK: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB3]], i32 [[OMP_THREAD2]]) } omp.yield } @@ -1963,37 +1962,37 @@ // CHECK-NEXT: i32 1, label %[[SECTION2:.*]] // CHECK-NEXT: ] - // CHECK: [[INC]]: - // CHECK: %{{.*}} = add {{.*}}, 1 - // CHECK: br label %[[HEADER]] - - // CHECK: [[EXIT]]: - // CHECK: call void @__kmpc_for_static_fini({{.*}}) - // CHECK: call void @__kmpc_barrier({{.*}}) - // CHECK: br label %[[AFTER:.*]] - - // CHECK: [[AFTER]]: - // CHECK: br label %[[END:.*]] - - // CHECK: [[END]]: - // CHECK: ret void omp.sections { omp.section { // CHECK: [[SECTION1]]: - // CHECK-NEXT: br label %[[REGION1:[^ ,]*]] - // CHECK: [[REGION1]]: - // CHECK-NEXT: br label %[[EXIT]] + // CHECK-NEXT: br label %[[SECTION1_REGION1:[^ ,]*]] + // CHECK-EMPTY: + // CHECK-NEXT: [[SECTION1_REGION1]]: + // CHECK-NEXT: br label %[[SECTION1_REGION2:[^ ,]*]] + // CHECK-EMPTY: + // CHECK-NEXT: [[SECTION1_REGION2]]: + // CHECK-NEXT: br label %[[INC]] omp.terminator } omp.section { // CHECK: [[SECTION2]]: - // CHECK-NEXT: br label %[[REGION2:[^ ,]*]] - // CHECK: [[REGION2]]: - // CHECK-NEXT: br label %[[EXIT]] + // CHECK: br label %[[INC]] omp.terminator } omp.terminator } + + // CHECK: [[INC]]: + // CHECK: %{{.*}} = add {{.*}}, 1 + // CHECK: br label %[[HEADER]] + + // CHECK: [[EXIT]]: + // CHECK: call void @__kmpc_for_static_fini({{.*}}) + // CHECK: call void @__kmpc_barrier({{.*}}) + // CHECK: br label %[[AFTER:.*]] + + // CHECK: [[AFTER]]: + // CHECK: ret void llvm.return }