diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -170,6 +170,20 @@ BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); + // Move instructions from the to-be-deleted ArtificialEntry to the entry + // basic block of the parallel region. CodeExtractor may have sunk + // allocas/bitcasts for values that are solely used in the outlined region + // and do not escape. + for (BasicBlock::iterator It = ArtificialEntry.begin(); + It != ArtificialEntry.end();) { + Instruction &I = *It; + It++; + + if (I.isTerminator()) + continue; + + I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt()); + } OI.EntryBB->moveBefore(&ArtificialEntry); ArtificialEntry.eraseFromParent(); } diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -28,6 +28,7 @@ #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" using namespace llvm; using namespace omp; @@ -602,15 +603,14 @@ if (!RFI.Declaration) return false; - // Check if there any __kmpc_push_proc_bind calls for explicit affinities. - OMPInformationCache::RuntimeFunctionInfo &ProcBindRFI = - OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind]; - - // Defensively abort if explicit affinities are set. - // TODO: Track ICV proc_bind to merge when mergable regions have the same - // affinity. - if (ProcBindRFI.Declaration) - return false; + // Array of structs of runtime functions that prevent merging. + struct { + RuntimeFunction RF; + bool skipNext; + } UnmergableCallInfo[] = {{OMPRTL_omp_in_parallel, false}, + {OMPRTL_omp_set_num_threads, false}, + {OMPRTL___kmpc_push_proc_bind, true}, + {OMPRTL___kmpc_push_num_threads, true}}; bool Changed = false; LoopInfo *LI = nullptr; @@ -638,6 +638,109 @@ auto FiniCB = [&](InsertPointTy CodeGenIP) {}; + auto CreateSequentialRegion = [&](Function *OuterFn, Instruction *SeqStartI, + Instruction *SeqEndI) { + // Isolate the instructions of the sequential region to a separate + // block. + BasicBlock *ParentBB = SeqStartI->getParent(); + BasicBlock *SeqEndBB = + SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI); + BasicBlock *SeqAfterBB = + SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI); + BasicBlock *SeqStartBB = + SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged"); + + assert(ParentBB->getUniqueSuccessor() == SeqStartBB && + "Expected a different CFG"); + const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); + ParentBB->getTerminator()->eraseFromParent(); + OpenMPIRBuilder::LocationDescription Loc( + InsertPointTy(ParentBB, ParentBB->end()), DL); + + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + BasicBlock *CGStartBB = CodeGenIP.getBlock(); + BasicBlock *CGEndBB = + SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); + assert(SeqStartBB != nullptr && "SeqStartBB should not be null"); + CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB); + assert(SeqEndBB != nullptr && "SeqEndBB should not be null"); + SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB); + }; + auto FiniCB = [&](InsertPointTy CodeGenIP) {}; + + InsertPointTy SeqAfterIP = + OMPInfoCache.OMPBuilder.CreateMaster(Loc, BodyGenCB, FiniCB); + + OMPInfoCache.OMPBuilder.CreateBarrier(SeqAfterIP, OMPD_parallel); + + // Find outputs from the sequential region to broadcast to users outside + // the soon-to-be-merged region. Inputs and sinking/hoisting candidates + // do not require any action, they will be handled by OMPIRBuilder when + // extracting the merged outlined region. + CodeExtractorAnalysisCache CEAC(*OuterFn); + CodeExtractor Extractor({SeqStartBB}, /* DominatorTree */ nullptr, + /* AggregateArgs */ false, + /* BlockFrequencyInfo */ nullptr, + /* BranchProbabilityInfo */ nullptr, + /* AssumptionCache */ nullptr, + /* AllowVarArgs */ true, + /* AllowAlloca */ true, + /* Suffix */ ".seq"); + + BasicBlock *CommonExit = nullptr; + SetVector Inputs, Outputs, SinkingCands, HoistingCands; + Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); + Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); + + for (Value *V : Outputs) { + SmallPtrSet EscapingUsers; + SmallPtrSet EscapingBBs; + SmallDenseMap EscapingBB2LI; + + // Find all escaping users, outside the sequential region, and their + // parent BBs. + for (User *U : V->users()) { + Instruction *UI = dyn_cast(U); + if (!UI) + continue; + + if (UI->getParent() == SeqStartBB) + continue; + + EscapingUsers.insert(UI); + EscapingBBs.insert(UI->getParent()); + } + + // Emit an alloca in the outer region to store the broadcasted value. + const DataLayout &DL = M.getDataLayout(); + AllocaInst *AllocaI = new AllocaInst( + V->getType(), DL.getAllocaAddrSpace(), nullptr, + V->getName() + ".seq.output.alloc", &OuterFn->front().front()); + + new StoreInst(V, AllocaI, SeqStartBB->getTerminator()); + + // Emit the load instruction in escaping BBs. + for (BasicBlock *BBI : EscapingBBs) { + LoadInst *LoadI = + new LoadInst(V->getType(), AllocaI, + V->getName() + ".seq.output.load", &BBI->front()); + EscapingBB2LI[BBI] = LoadI; + } + + // Replace uses of the escaping value with the load + // instruction in this basic block. + for (Instruction *UI : EscapingUsers) { + LoadInst *LoadI = EscapingBB2LI[UI->getParent()]; + UI->replaceUsesOfWith(V, LoadI); + } + } + + BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock()); + + LLVM_DEBUG(dbgs() << "After sequential inlining " << *OuterFn << "\n"); + }; + // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all // contained in BB and only separated by instructions that can be // redundantly executed in parallel. The block BB is split before the first @@ -683,6 +786,20 @@ const DebugLoc DL = BB->getTerminator()->getDebugLoc(); BB->getTerminator()->eraseFromParent(); + // Create sequential regions for sequential instructions that are + // in-between mergable parallel regions. + for (auto It = MergableCIs.begin(); It != MergableCIs.end() - 1; ++It) { + Instruction *ForkCI = *It; + Instruction *NextForkCI = *(It + 1); + + // Continue if there are not in-between instructions. + if (ForkCI->getNextNode() == NextForkCI) + continue; + + CreateSequentialRegion(OriginalFn, ForkCI->getNextNode(), + NextForkCI->getPrevNode()); + } + OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()), DL); IRBuilder<>::InsertPoint AllocaIP( @@ -783,16 +900,55 @@ BasicBlock *BB = It.getFirst(); SmallVector MergableCIs; + struct UnmergableInfo { + bool isUnmergable; + bool skipNext; + }; + auto isUnmergable = [&](Instruction & I) -> struct UnmergableInfo { + if (isa(&I)) { + CallInst &CI = cast(I); + for (auto UI : UnmergableCallInfo) { + OMPInformationCache::RuntimeFunctionInfo &RFI = + OMPInfoCache.RFIs[UI.RF]; + + if (CI.getCalledFunction() == RFI.Declaration) { + LLVM_DEBUG(dbgs() << "Unmergable calling " << RFI.Name + << " skipNext " << UI.skipNext << "\n"); + return {true, UI.skipNext}; + } + } + } + + return {false, false}; + }; // Find maximal number of parallel region CIs that are safe to merge. - for (Instruction &I : *BB) { + for (auto It = BB->begin(); It != BB->end(); It++) { + Instruction &I = *It; if (CIs.count(&I)) { MergableCIs.push_back(cast(&I)); continue; } - if (isSafeToSpeculativelyExecute(&I, &I, DT)) + auto UI = isUnmergable(I); + + // Continue adding parallel region CIs if the instruction + // is not unmergable and it is not the terminator. + if (!UI.isUnmergable && !I.isTerminator()) continue; + // Forward the instruction iterator to skip the next parallel region + // if there is an unmergable call that affects it. + if (UI.skipNext) { + Instruction *SkipI = nullptr; + do { + It++; + SkipI = &*It; + } while (!CIs.count(SkipI)); + LLVM_DEBUG(dbgs() << "Skip parallel region " << *SkipI << "due to " + << I << "\n"); + } + + // Store mergable regions found so far. if (MergableCIs.size() > 1) { MergableCIsVector.push_back(MergableCIs); LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size() @@ -817,11 +973,22 @@ RFI.clearUsesMap(); OMPInfoCache.collectUses(RFI, /* CollectStats */ false); - // Collect uses for the emitted barrier call. + // Collect uses for emitted barrier calls. OMPInformationCache::RuntimeFunctionInfo &BarrierRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_barrier]; BarrierRFI.clearUsesMap(); OMPInfoCache.collectUses(BarrierRFI, /* CollectStats */ false); + + // Collect uses for any emitted master/end_master calls. + OMPInformationCache::RuntimeFunctionInfo &MasterRFI = + OMPInfoCache.RFIs[OMPRTL___kmpc_master]; + MasterRFI.clearUsesMap(); + OMPInfoCache.collectUses(MasterRFI, /* CollectStats */ false); + + OMPInformationCache::RuntimeFunctionInfo &EndMasterRFI = + OMPInfoCache.RFIs[OMPRTL___kmpc_end_master]; + EndMasterRFI.clearUsesMap(); + OMPInfoCache.collectUses(EndMasterRFI, /* CollectStats */ false); } return Changed; diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll --- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll @@ -1,6 +1,421 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs ; RUN: opt -S -passes='attributor,cgscc(openmpopt)' -openmp-opt-enable-merging < %s | FileCheck %s - +; #include +; int def(); +; void use(int); +; void use_ptr(int *); +; void merge() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_omp_in_parallel() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; use(omp_in_parallel()); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_omp_set_num_threads() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; omp_set_num_threads(def()); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_proc_bind() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel proc_bind(close) +; { +; use(a); +; } +; } +; void unmergable_num_threads() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel num_threads(def()) +; { +; use(a); +; } +; } +; void merge_seq_use() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; use(a); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void merge_seq_def_a() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; use(a); +; } +; void merge_seq_def_b() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; int b = def(); +; #pragma omp parallel +; { +; use(a); +; } +; use(b); +; } +; void merge_seq_def_firstprivate() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; a = def(); +; #pragma omp parallel firstprivate(a) +; { +; use(a); +; } +; use(a); +; } +; void merge_seq_sink_lt() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; { +; int b = def(); +; use_ptr(&b); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void merge_seq_above() { +; int a = def(); +; { +; #pragma omp parallel +; { +; use(a); +; } +; int b = def(); +; use_ptr(&b); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void merge_seq_scope_below() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; { +; int b = def(); +; use_ptr(&b); +; #pragma omp parallel +; { +; use(a); +; } +; } +; } +; void merge_seq_scope_below_use() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; { +; int b = def(); +; use_ptr(&b); +; #pragma omp parallel +; { +; use(a); +; } +; use(b); +; } +; } +; void merge_seq_scope_par_use() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; int b = def(); +; #pragma omp parallel +; { +; use(a); +; use(b); +; } +; } +; void merge_seq_use_arg(int arg) { +; #pragma omp parallel +; { +; use(arg); +; } +; use(arg); +; #pragma omp parallel +; { +; use(arg); +; } +; } +; void merge_cancellable_regions(int cancel1, int cancel2) +; { +; #pragma omp parallel +; { +; if(cancel1) { +; #pragma omp cancel parallel +; } +; } +; #pragma omp parallel +; { +; if (cancel2) { +; #pragma omp cancel parallel +; } +; } +; } +; void merge_cancellable_regions_seq(int cancel1, int cancel2) +; { +; #pragma omp parallel +; { +; if(cancel1) { +; #pragma omp cancel parallel +; } +; } +; cancel2 = def(); +; #pragma omp parallel +; { +; if (cancel2) { +; #pragma omp cancel parallel +; } +; } +; } +; void merge_3() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_3_omp_in_parallel() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; use(omp_in_parallel()); +; #pragma omp parallel +; { +; use(a); +; } +; use(omp_in_parallel()); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_3_omp_set_num_threads() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; omp_set_num_threads(def()); +; #pragma omp parallel +; { +; use(a); +; } +; omp_set_num_threads(def()); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_3_proc_bind() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel proc_bind(close) +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_3_num_threads() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel num_threads(def()) +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_1_merge_2_omp_in_parallel() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; use(omp_in_parallel()); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void merge_2_unmergable_1_omp_in_parallel() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; use(omp_in_parallel()); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_1_merge_2_omp_set_num_threads() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; omp_set_num_threads(def()); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void merge_2_unmergable_1_omp_set_num_threads() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; omp_set_num_threads(def()); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_1_merge_2_num_threads() { +; int a = def(); +; #pragma omp parallel num_threads(def()) +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_1_merge_2_proc_bind() { +; int a = def(); +; #pragma omp parallel proc_bind(close) +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void merge_2_unmergable_1_proc_bind() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel proc_bind(close) +; { +; use(a); +; } +; } target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" %struct.ident_t = type { i32, i32, i32, i32, i8* } @@ -8,219 +423,2599 @@ @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 -; void merge_all() { -; int a = 1; -; #pragma omp parallel -; { -; a = 2; -; } -; #pragma omp parallel -; { -; a = 3; -; } -; } +define dso_local void @merge() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +declare dso_local i32 @def(...) local_unnamed_addr + +define internal void @.omp_outlined.(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare dso_local void @use(i32) local_unnamed_addr + +declare !callback !1 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr + +define internal void @.omp_outlined..1(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +define dso_local void @unmergable_omp_in_parallel() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 @omp_in_parallel() + call void @use(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..2(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare dso_local i32 @omp_in_parallel() local_unnamed_addr + +define internal void @.omp_outlined..3(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_omp_set_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void @omp_set_num_threads(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..4(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare dso_local void @omp_set_num_threads(i32) local_unnamed_addr + +define internal void @.omp_outlined..5(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_proc_bind() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @__kmpc_push_proc_bind(%struct.ident_t* nonnull @1, i32 %0, i32 3) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..6(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..7(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr + +declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) local_unnamed_addr + +define dso_local void @unmergable_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void @__kmpc_push_num_threads(%struct.ident_t* nonnull @1, i32 %0, i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..8(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..9(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) local_unnamed_addr + +define dso_local void @merge_seq_use() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = load i32, i32* %a, align 4 + call void @use(i32 %1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..10(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..11(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_def_a() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = load i32, i32* %a, align 4 + call void @use(i32 %1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..12(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..13(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_def_b() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @use(i32 %call1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..14(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..15(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_def_firstprivate() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %a, align 4 + %a.casted.sroa.0.0.insert.ext = zext i32 %call1 to i64 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 %a.casted.sroa.0.0.insert.ext) + %1 = load i32, i32* %a, align 4 + call void @use(i32 %1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..16(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..17(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i64 %a) { +entry: + %a.addr.sroa.0.0.extract.trunc = trunc i64 %a to i32 + call void @use(i32 %a.addr.sroa.0.0.extract.trunc) + ret void +} + +define dso_local void @merge_seq_sink_lt() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = bitcast i32* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %b, align 4 + call void @use_ptr(i32* nonnull %b) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..18(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare dso_local void @use_ptr(i32*) local_unnamed_addr + +define internal void @.omp_outlined..19(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_above() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = bitcast i32* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %b, align 4 + call void @use_ptr(i32* nonnull %b) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..21 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..20(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..21(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_scope_below() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = bitcast i32* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %b, align 4 + call void @use_ptr(i32* nonnull %b) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..22(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..23(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_scope_below_use() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = bitcast i32* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %b, align 4 + call void @use_ptr(i32* nonnull %b) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), i32* nonnull %a) + %2 = load i32, i32* %b, align 4 + call void @use(i32 %2) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..24(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..25(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_scope_par_use() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = bitcast i32* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %b, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32* nonnull %a, i32* nonnull %b) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..26(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..27(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a, i32* nocapture nonnull readonly align 4 dereferenceable(4) %b) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + %1 = load i32, i32* %b, align 4 + call void @use(i32 %1) + ret void +} + +define dso_local void @merge_seq_use_arg(i32 %arg) local_unnamed_addr { +entry: + %arg.addr = alloca i32, align 4 + store i32 %arg, i32* %arg.addr, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nonnull %arg.addr) + %0 = load i32, i32* %arg.addr, align 4 + call void @use(i32 %0) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nonnull %arg.addr) + ret void +} + +define internal void @.omp_outlined..28(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %arg) { +entry: + %0 = load i32, i32* %arg, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..29(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %arg) { +entry: + %0 = load i32, i32* %arg, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_cancellable_regions(i32 %cancel1, i32 %cancel2) local_unnamed_addr { +entry: + %cancel1.addr = alloca i32, align 4 + %cancel2.addr = alloca i32, align 4 + store i32 %cancel1, i32* %cancel1.addr, align 4 + store i32 %cancel2, i32* %cancel2.addr, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nonnull %cancel1.addr) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nonnull %cancel2.addr) + ret void +} + +define internal void @.omp_outlined..30(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel1) { +entry: + %0 = load i32, i32* %cancel1, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %1 = load i32, i32* %.global_tid., align 4 + %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1) + ret void + +if.end: ; preds = %entry + ret void +} + +declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) local_unnamed_addr + +define internal void @.omp_outlined..31(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel2) { +entry: + %0 = load i32, i32* %cancel2, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %1 = load i32, i32* %.global_tid., align 4 + %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1) + ret void + +if.end: ; preds = %entry + ret void +} + +define dso_local void @merge_cancellable_regions_seq(i32 %cancel1, i32 %cancel2) local_unnamed_addr { +entry: + %cancel1.addr = alloca i32, align 4 + %cancel2.addr = alloca i32, align 4 + store i32 %cancel1, i32* %cancel1.addr, align 4 + store i32 %cancel2, i32* %cancel2.addr, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nonnull %cancel1.addr) + %call = call i32 (...) @def() + store i32 %call, i32* %cancel2.addr, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nonnull %cancel2.addr) + ret void +} + +define internal void @.omp_outlined..32(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel1) { +entry: + %0 = load i32, i32* %cancel1, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %1 = load i32, i32* %.global_tid., align 4 + %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1) + ret void + +if.end: ; preds = %entry + ret void +} + +define internal void @.omp_outlined..33(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel2) { +entry: + %0 = load i32, i32* %cancel2, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %1 = load i32, i32* %.global_tid., align 4 + %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1) + ret void + +if.end: ; preds = %entry + ret void +} + +define dso_local void @merge_3() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..34(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..35(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..36(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_3_omp_in_parallel() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..37 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 @omp_in_parallel() + call void @use(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..38 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call2 = call i32 @omp_in_parallel() + call void @use(i32 %call2) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..37(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..38(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..39(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_3_omp_set_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..40 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void @omp_set_num_threads(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..41 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call2 = call i32 (...) @def() + call void @omp_set_num_threads(i32 %call2) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..42 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..40(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..41(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..42(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_3_proc_bind() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @__kmpc_push_proc_bind(%struct.ident_t* nonnull @1, i32 %0, i32 3) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..44 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..45 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..43(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..44(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..45(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_3_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..46 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void @__kmpc_push_num_threads(%struct.ident_t* nonnull @1, i32 %0, i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..48 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..46(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..47(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..48(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_1_merge_2_omp_in_parallel() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..49 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 @omp_in_parallel() + call void @use(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..50 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..49(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..50(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..51(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_2_unmergable_1_omp_in_parallel() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..52 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..53 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 @omp_in_parallel() + call void @use(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..54 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..52(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..53(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..54(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_1_merge_2_omp_set_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..55 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void @omp_set_num_threads(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..56 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..57 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..55(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..56(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..57(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_2_unmergable_1_omp_set_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..58 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..59 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void @omp_set_num_threads(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..60 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..58(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..59(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..60(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_1_merge_2_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + %call1 = call i32 (...) @def() + call void @__kmpc_push_num_threads(%struct.ident_t* nonnull @1, i32 %0, i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..61 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..62 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..63 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..61(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..62(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..63(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_1_merge_2_proc_bind() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void @__kmpc_push_proc_bind(%struct.ident_t* nonnull @1, i32 %0, i32 3) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..64 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..65 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..66 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..64(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..65(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..66(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_2_unmergable_1_proc_bind() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..67 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..68 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @__kmpc_push_proc_bind(%struct.ident_t* nonnull @1, i32 %0, i32 3) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..69 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..67(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..68(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..69(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!2} +!2 = !{i64 2, i64 -1, i64 -1, i1 true} +; CHECK-LABEL: define {{[^@]+}}@merge() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4:#.*]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0:#.*]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_omp_in_parallel() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @omp_in_parallel() +; CHECK-NEXT: call void @use(i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_omp_set_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @omp_set_num_threads(i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_use() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_use..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_use..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP3]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_def_a() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_def_a..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP1]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_def_a..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[A]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_def_b() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_def_b..omp_par to void (i32*, i32*, ...)*), i32* [[A]], i32* [[CALL1_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[CALL1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[CALL1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: call void @use(i32 [[CALL1_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_def_b..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]], i32* [[CALL1_SEQ_OUTPUT_ALLOC:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[CALL1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_def_firstprivate() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_def_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP1]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_def_firstprivate..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[A]], align 4 +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[CALL1]] to i64 +; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32 +; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[B]], align 4 +; CHECK-NEXT: call void @use_ptr(i32* noundef nonnull [[B]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_above() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_above..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_above..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[B]], align 4 +; CHECK-NEXT: call void @use_ptr(i32* noundef nonnull [[B]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_scope_below() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_scope_below..omp_par to void (i32*, i32*, ...)*), i32* [[A]], i32* [[B]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_scope_below..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]], i32* [[B:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[B]], align 4 +; CHECK-NEXT: call void @use_ptr(i32* noundef nonnull [[B]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_scope_below_use() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_scope_below_use..omp_par to void (i32*, i32*, ...)*), i32* [[A]], i32* [[B]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP1]]) +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_scope_below_use..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]], i32* [[B:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[B]], align 4 +; CHECK-NEXT: call void @use_ptr(i32* noundef nonnull [[B]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_scope_par_use() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_scope_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A]], i32* [[B]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_scope_par_use..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]], i32* [[B:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[B]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_use_arg +; CHECK-SAME: (i32 [[ARG:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_use_arg..omp_par to void (i32*, i32*, ...)*), i32* [[ARG_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_use_arg..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARG_ADDR:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..28(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[ARG_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..29(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[ARG_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP3]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions +; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..30(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..31(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30 +; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31 +; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq +; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..32(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..33(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32 +; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33 +; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_3() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..34(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..35(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: call void @.omp_outlined..36(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_omp_in_parallel() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @omp_in_parallel() +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32)* @unmergable_3_omp_in_parallel..omp_par to void (i32*, i32*, ...)*), i32* [[A]], i32 [[CALL2]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_omp_in_parallel..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]], i32 [[CALL2:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] +; CHECK: omp_region.end4: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split.split.split: +; CHECK-NEXT: call void @.omp_outlined..39(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body5: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] +; CHECK: seq.par.merged2: +; CHECK-NEXT: call void @use(i32 [[CALL2]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] +; CHECK: omp_region.body5.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: br label [[OMP_REGION_END4]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: call void @use(i32 [[CALL2]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] ; -; Merge all parallel regions. -define dso_local void @merge_all() local_unnamed_addr { - %1 = alloca i32, align 4 - %2 = bitcast i32* %1 to i8* - store i32 1, i32* %1, align 4 - %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_all..omp_par to void (i32*, i32*, ...)*), i32* nonnull %1) - %4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_all..omp_par.1 to void (i32*, i32*, ...)*), i32* nonnull %1) - ret void -} - -define internal void @merge_all..omp_par.1(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 3, i32* %2, align 4 - ret void -} - -define internal void @merge_all..omp_par(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 2, i32* %2, align 4 - ret void -} - - -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr - -declare !callback !1 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr - -; void merge_none() { -; int a = 1; -; #pragma omp parallel -; { -; a = 2; -; } -; a = 3; -; #pragma omp parallel -; { -; a = 4; -; } -; } -; -; Does not merge parallel regions, in-between store -; instruction is unsafe to execute in parallel. -define dso_local void @merge_none() local_unnamed_addr { - %1 = alloca i32, align 4 - %2 = bitcast i32* %1 to i8* - store i32 1, i32* %1, align 4 - %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_none..omp_par to void (i32*, i32*, ...)*), i32* nonnull %1) - store i32 3, i32* %1, align 4 - %4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_none..omp_par.2 to void (i32*, i32*, ...)*), i32* nonnull %1) - ret void -} - -define internal void @merge_none..omp_par.2(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 4, i32* %2, align 4 - ret void -} - -define internal void @merge_none..omp_par(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 2, i32* %2, align 4 - ret void -} - -; void merge_some() { -; int a = 1; -; #pragma omp parallel -; { -; a = 2; -; } -; a = 3; -; #pragma omp parallel -; { -; a = 4; -; } -; #pragma omp parallel -; { -; a = 5; -; } -; } ; -; Do not merge first parallel region, due to the -; unsafe store, but merge the two next parallel -; regions. -define dso_local void @merge_some() local_unnamed_addr { - %1 = alloca i32, align 4 - %2 = bitcast i32* %1 to i8* - store i32 1, i32* %1, align 4 - %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_some..omp_par to void (i32*, i32*, ...)*), i32* nonnull %1) - store i32 3, i32* %1, align 4 - %4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_some..omp_par.3 to void (i32*, i32*, ...)*), i32* nonnull %1) - %5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_some..omp_par.4 to void (i32*, i32*, ...)*), i32* nonnull %1) - ret void -} - -define internal void @merge_some..omp_par.4(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 5, i32* %2, align 4 - ret void -} - -define internal void @merge_some..omp_par.3(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 4, i32* %2, align 4 - ret void -} - -define internal void @merge_some..omp_par(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 2, i32* %2, align 4 - ret void -} - -; void merge_cancellable_regions(int cancel1, int cancel2) -; { -; #pragma omp parallel -; { -; if(cancel1) { -; #pragma omp cancel parallel -; } -; } -; #pragma omp parallel -; { -; if (cancel2) { -; #pragma omp cancel parallel -; } -; } -; } +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void ; -; Merge correctly cancellable regions. -define dso_local void @merge_cancellable_regions(i32 %0, i32 %1) local_unnamed_addr { - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - store i32 %0, i32* %3, align 4 - store i32 %1, i32* %4, align 4 - %5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* nonnull %3) - %6 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_cancellable_regions..omp_par.5 to void (i32*, i32*, ...)*), i32* nonnull %4) - ret void -} - -define internal void @merge_cancellable_regions..omp_par.5(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture readonly %2) { - %4 = load i32, i32* %2, align 4 - %5 = icmp eq i32 %4, 0 - br i1 %5, label %6, label %7 - -6: ; preds = %3 - ret void - -7: ; preds = %3 - %8 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - %9 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %8, i32 1) - ret void -} - -define internal void @merge_cancellable_regions..omp_par(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture readonly %2) { - %4 = load i32, i32* %2, align 4 - %5 = icmp eq i32 %4, 0 - br i1 %5, label %6, label %7 - -6: ; preds = %3 - ret void - -7: ; preds = %3 - %8 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - %9 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %8, i32 1) - ret void -} - -declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) local_unnamed_addr - - -!llvm.module.flags = !{!0} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{!2} -!2 = !{i64 2, i64 -1, i64 -1, i1 true} -; CHECK-LABEL: define {{[^@]+}}@merge_all() local_unnamed_addr { -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1:@.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 1, i32* [[TMP2]], align 4 +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_omp_set_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..40 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @omp_set_num_threads(i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..41 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @omp_set_num_threads(i32 [[CALL2]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..42 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..40 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..41 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..42 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..44 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..45 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..43 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..44 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..45 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..46 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..48 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..46 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..47 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..48 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_omp_in_parallel() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..49 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @omp_in_parallel() +; CHECK-NEXT: call void @use(i32 [[CALL1]]) ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_all..omp_par.2 to void (i32*, i32*, ...)*), i32* [[TMP2]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @unmergable_1_merge_2_omp_in_parallel..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: -; CHECK-NEXT: br label [[DOTSPLIT_SPLIT:%.*]] -; CHECK: .split.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_all..omp_par.2 -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[TMP0:%.*]]) [[ATTR0:#.*]] { +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_omp_in_parallel..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: @@ -228,12 +3023,86 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @merge_all..omp_par(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP0]]) +; CHECK-NEXT: call void @.omp_outlined..50(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @merge_all..omp_par.1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP0]]) -; CHECK-NEXT: br label [[DOTSPLIT:%.*]] -; CHECK: .split: +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..51(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..49 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..50 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..51 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1_omp_in_parallel() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1_omp_in_parallel..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @omp_in_parallel() +; CHECK-NEXT: call void @use(i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..54 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1_omp_in_parallel..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..52(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..53(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] ; CHECK: omp.par.region.split: ; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] @@ -241,65 +3110,134 @@ ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_all..omp_par.1 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1:#.*]] { -; CHECK-NEXT: store i32 3, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..52 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..53 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..54 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_omp_set_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..55 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @omp_set_num_threads(i32 [[CALL1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @unmergable_1_merge_2_omp_set_num_threads..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_all..omp_par -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1]] { -; CHECK-NEXT: store i32 2, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_omp_set_num_threads..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..56(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..57(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_none() local_unnamed_addr { -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) -; CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 1, i32* [[TMP2]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @merge_none..omp_par to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2]]) -; CHECK-NEXT: store i32 3, i32* [[TMP2]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @merge_none..omp_par.2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2]]) +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..55 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_none..omp_par.2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1]] { -; CHECK-NEXT: store i32 4, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..56 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_none..omp_par -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1]] { -; CHECK-NEXT: store i32 2, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..57 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_some() local_unnamed_addr { -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) -; CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 1, i32* [[TMP2]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @merge_some..omp_par to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2]]) -; CHECK-NEXT: store i32 3, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1_omp_set_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_some..omp_par.5 to void (i32*, i32*, ...)*), i32* [[TMP2]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1_omp_set_num_threads..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: -; CHECK-NEXT: br label [[DOTSPLIT_SPLIT:%.*]] -; CHECK: .split.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @omp_set_num_threads(i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..60 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_some..omp_par.5 -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[TMP0:%.*]]) [[ATTR0]] { +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1_omp_set_num_threads..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: @@ -307,12 +3245,12 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @merge_some..omp_par.3(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP0]]) +; CHECK-NEXT: call void @.omp_outlined..58(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) ; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @merge_some..omp_par.4(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP0]]) -; CHECK-NEXT: br label [[DOTSPLIT:%.*]] -; CHECK: .split: +; CHECK-NEXT: call void @.omp_outlined..59(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] ; CHECK: omp.par.region.split: ; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] @@ -320,50 +3258,61 @@ ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_some..omp_par.4 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1]] { -; CHECK-NEXT: store i32 5, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..58 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_some..omp_par.3 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1]] { -; CHECK-NEXT: store i32 4, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..59 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_some..omp_par -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1]] { -; CHECK-NEXT: store i32 2, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..60 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions -; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) -; CHECK-NEXT: [[TMP4:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP5:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..61 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par.6 to void (i32*, i32*, ...)*), i32* [[TMP4]], i32* [[TMP5]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @unmergable_1_merge_2_num_threads..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: -; CHECK-NEXT: br label [[DOTSPLIT_SPLIT:%.*]] -; CHECK: .split.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par.6 -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) [[ATTR0]] { +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_num_threads..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP2]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: @@ -371,12 +3320,12 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @merge_cancellable_regions..omp_par(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[TMP0]]) +; CHECK-NEXT: call void @.omp_outlined..62(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) ; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @merge_cancellable_regions..omp_par.5(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[TMP1]]) -; CHECK-NEXT: br label [[DOTSPLIT:%.*]] -; CHECK: .split: +; CHECK-NEXT: call void @.omp_outlined..63(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] ; CHECK: omp.par.region.split: ; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] @@ -384,28 +3333,174 @@ ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par.5 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[TMP2:%.*]]) { -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; CHECK: 6: +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..61 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull [[GLOB1]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP8]], i32 noundef 1) +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..62 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[TMP2:%.*]]) { -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; CHECK: 6: -; CHECK-NEXT: ret void -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull [[GLOB1]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP8]], i32 noundef 1) +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..63 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_proc_bind() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..64 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @unmergable_1_merge_2_proc_bind..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_proc_bind..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..65(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..66(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..64 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..65 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..66 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1_proc_bind() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1_proc_bind..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..69 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1_proc_bind..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..67(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..68(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..67 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..68 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..69 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging_legacy_pm.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging_legacy_pm.ll --- a/llvm/test/Transforms/OpenMP/parallel_region_merging_legacy_pm.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging_legacy_pm.ll @@ -1,6 +1,421 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs ; RUN: opt -S -attributor -openmpopt -openmp-opt-enable-merging < %s | FileCheck %s - +; #include +; int def(); +; void use(int); +; void use_ptr(int *); +; void merge() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_omp_in_parallel() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; use(omp_in_parallel()); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_omp_set_num_threads() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; omp_set_num_threads(def()); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_proc_bind() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel proc_bind(close) +; { +; use(a); +; } +; } +; void unmergable_num_threads() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel num_threads(def()) +; { +; use(a); +; } +; } +; void merge_seq_use() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; use(a); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void merge_seq_def_a() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; use(a); +; } +; void merge_seq_def_b() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; int b = def(); +; #pragma omp parallel +; { +; use(a); +; } +; use(b); +; } +; void merge_seq_def_firstprivate() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; a = def(); +; #pragma omp parallel firstprivate(a) +; { +; use(a); +; } +; use(a); +; } +; void merge_seq_sink_lt() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; { +; int b = def(); +; use_ptr(&b); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void merge_seq_above() { +; int a = def(); +; { +; #pragma omp parallel +; { +; use(a); +; } +; int b = def(); +; use_ptr(&b); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void merge_seq_scope_below() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; { +; int b = def(); +; use_ptr(&b); +; #pragma omp parallel +; { +; use(a); +; } +; } +; } +; void merge_seq_scope_below_use() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; { +; int b = def(); +; use_ptr(&b); +; #pragma omp parallel +; { +; use(a); +; } +; use(b); +; } +; } +; void merge_seq_scope_par_use() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; int b = def(); +; #pragma omp parallel +; { +; use(a); +; use(b); +; } +; } +; void merge_seq_use_arg(int arg) { +; #pragma omp parallel +; { +; use(arg); +; } +; use(arg); +; #pragma omp parallel +; { +; use(arg); +; } +; } +; void merge_cancellable_regions(int cancel1, int cancel2) +; { +; #pragma omp parallel +; { +; if(cancel1) { +; #pragma omp cancel parallel +; } +; } +; #pragma omp parallel +; { +; if (cancel2) { +; #pragma omp cancel parallel +; } +; } +; } +; void merge_cancellable_regions_seq(int cancel1, int cancel2) +; { +; #pragma omp parallel +; { +; if(cancel1) { +; #pragma omp cancel parallel +; } +; } +; cancel2 = def(); +; #pragma omp parallel +; { +; if (cancel2) { +; #pragma omp cancel parallel +; } +; } +; } +; void merge_3() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_3_omp_in_parallel() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; use(omp_in_parallel()); +; #pragma omp parallel +; { +; use(a); +; } +; use(omp_in_parallel()); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_3_omp_set_num_threads() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; omp_set_num_threads(def()); +; #pragma omp parallel +; { +; use(a); +; } +; omp_set_num_threads(def()); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_3_proc_bind() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel proc_bind(close) +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_3_num_threads() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel num_threads(def()) +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_1_merge_2_omp_in_parallel() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; use(omp_in_parallel()); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void merge_2_unmergable_1_omp_in_parallel() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; use(omp_in_parallel()); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_1_merge_2_omp_set_num_threads() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; omp_set_num_threads(def()); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void merge_2_unmergable_1_omp_set_num_threads() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; omp_set_num_threads(def()); +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_1_merge_2_num_threads() { +; int a = def(); +; #pragma omp parallel num_threads(def()) +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void unmergable_1_merge_2_proc_bind() { +; int a = def(); +; #pragma omp parallel proc_bind(close) +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; } +; void merge_2_unmergable_1_proc_bind() { +; int a = def(); +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel +; { +; use(a); +; } +; #pragma omp parallel proc_bind(close) +; { +; use(a); +; } +; } target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" %struct.ident_t = type { i32, i32, i32, i32, i8* } @@ -8,220 +423,2599 @@ @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 -; void merge_all() { -; int a = 1; -; #pragma omp parallel -; { -; a = 2; -; } -; #pragma omp parallel -; { -; a = 3; -; } -; } +define dso_local void @merge() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +declare dso_local i32 @def(...) local_unnamed_addr + +define internal void @.omp_outlined.(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare dso_local void @use(i32) local_unnamed_addr + +declare !callback !1 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr + +define internal void @.omp_outlined..1(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +define dso_local void @unmergable_omp_in_parallel() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 @omp_in_parallel() + call void @use(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..2(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare dso_local i32 @omp_in_parallel() local_unnamed_addr + +define internal void @.omp_outlined..3(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_omp_set_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void @omp_set_num_threads(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..4(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare dso_local void @omp_set_num_threads(i32) local_unnamed_addr + +define internal void @.omp_outlined..5(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_proc_bind() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @__kmpc_push_proc_bind(%struct.ident_t* nonnull @1, i32 %0, i32 3) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..6(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..7(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr + +declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) local_unnamed_addr + +define dso_local void @unmergable_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void @__kmpc_push_num_threads(%struct.ident_t* nonnull @1, i32 %0, i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..8(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..9(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) local_unnamed_addr + +define dso_local void @merge_seq_use() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = load i32, i32* %a, align 4 + call void @use(i32 %1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..10(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..11(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_def_a() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = load i32, i32* %a, align 4 + call void @use(i32 %1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..12(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..13(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_def_b() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @use(i32 %call1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..14(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..15(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_def_firstprivate() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %a, align 4 + %a.casted.sroa.0.0.insert.ext = zext i32 %call1 to i64 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 %a.casted.sroa.0.0.insert.ext) + %1 = load i32, i32* %a, align 4 + call void @use(i32 %1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..16(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..17(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i64 %a) { +entry: + %a.addr.sroa.0.0.extract.trunc = trunc i64 %a to i32 + call void @use(i32 %a.addr.sroa.0.0.extract.trunc) + ret void +} + +define dso_local void @merge_seq_sink_lt() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = bitcast i32* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %b, align 4 + call void @use_ptr(i32* nonnull %b) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..18(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +declare dso_local void @use_ptr(i32*) local_unnamed_addr + +define internal void @.omp_outlined..19(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_above() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = bitcast i32* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %b, align 4 + call void @use_ptr(i32* nonnull %b) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..21 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..20(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..21(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_scope_below() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = bitcast i32* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %b, align 4 + call void @use_ptr(i32* nonnull %b) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..22(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..23(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_scope_below_use() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = bitcast i32* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %b, align 4 + call void @use_ptr(i32* nonnull %b) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), i32* nonnull %a) + %2 = load i32, i32* %b, align 4 + call void @use(i32 %2) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..24(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..25(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_seq_scope_par_use() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32* nonnull %a) + %1 = bitcast i32* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call1 = call i32 (...) @def() + store i32 %call1, i32* %b, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32* nonnull %a, i32* nonnull %b) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..26(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..27(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a, i32* nocapture nonnull readonly align 4 dereferenceable(4) %b) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + %1 = load i32, i32* %b, align 4 + call void @use(i32 %1) + ret void +} + +define dso_local void @merge_seq_use_arg(i32 %arg) local_unnamed_addr { +entry: + %arg.addr = alloca i32, align 4 + store i32 %arg, i32* %arg.addr, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nonnull %arg.addr) + %0 = load i32, i32* %arg.addr, align 4 + call void @use(i32 %0) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nonnull %arg.addr) + ret void +} + +define internal void @.omp_outlined..28(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %arg) { +entry: + %0 = load i32, i32* %arg, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..29(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %arg) { +entry: + %0 = load i32, i32* %arg, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_cancellable_regions(i32 %cancel1, i32 %cancel2) local_unnamed_addr { +entry: + %cancel1.addr = alloca i32, align 4 + %cancel2.addr = alloca i32, align 4 + store i32 %cancel1, i32* %cancel1.addr, align 4 + store i32 %cancel2, i32* %cancel2.addr, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nonnull %cancel1.addr) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nonnull %cancel2.addr) + ret void +} + +define internal void @.omp_outlined..30(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel1) { +entry: + %0 = load i32, i32* %cancel1, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %1 = load i32, i32* %.global_tid., align 4 + %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1) + ret void + +if.end: ; preds = %entry + ret void +} + +declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) local_unnamed_addr + +define internal void @.omp_outlined..31(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel2) { +entry: + %0 = load i32, i32* %cancel2, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %1 = load i32, i32* %.global_tid., align 4 + %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1) + ret void + +if.end: ; preds = %entry + ret void +} + +define dso_local void @merge_cancellable_regions_seq(i32 %cancel1, i32 %cancel2) local_unnamed_addr { +entry: + %cancel1.addr = alloca i32, align 4 + %cancel2.addr = alloca i32, align 4 + store i32 %cancel1, i32* %cancel1.addr, align 4 + store i32 %cancel2, i32* %cancel2.addr, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nonnull %cancel1.addr) + %call = call i32 (...) @def() + store i32 %call, i32* %cancel2.addr, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nonnull %cancel2.addr) + ret void +} + +define internal void @.omp_outlined..32(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel1) { +entry: + %0 = load i32, i32* %cancel1, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %1 = load i32, i32* %.global_tid., align 4 + %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1) + ret void + +if.end: ; preds = %entry + ret void +} + +define internal void @.omp_outlined..33(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel2) { +entry: + %0 = load i32, i32* %cancel2, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %1 = load i32, i32* %.global_tid., align 4 + %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1) + ret void + +if.end: ; preds = %entry + ret void +} + +define dso_local void @merge_3() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..34(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..35(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..36(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_3_omp_in_parallel() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..37 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 @omp_in_parallel() + call void @use(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..38 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call2 = call i32 @omp_in_parallel() + call void @use(i32 %call2) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..37(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..38(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..39(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_3_omp_set_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..40 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void @omp_set_num_threads(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..41 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call2 = call i32 (...) @def() + call void @omp_set_num_threads(i32 %call2) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..42 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..40(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..41(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..42(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_3_proc_bind() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @__kmpc_push_proc_bind(%struct.ident_t* nonnull @1, i32 %0, i32 3) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..44 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..45 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..43(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..44(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..45(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_3_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..46 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void @__kmpc_push_num_threads(%struct.ident_t* nonnull @1, i32 %0, i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..48 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..46(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..47(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..48(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_1_merge_2_omp_in_parallel() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..49 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 @omp_in_parallel() + call void @use(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..50 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..49(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..50(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..51(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_2_unmergable_1_omp_in_parallel() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..52 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..53 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 @omp_in_parallel() + call void @use(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..54 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..52(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..53(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..54(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_1_merge_2_omp_set_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..55 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void @omp_set_num_threads(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..56 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..57 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..55(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..56(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..57(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_2_unmergable_1_omp_set_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..58 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..59 to void (i32*, i32*, ...)*), i32* nonnull %a) + %call1 = call i32 (...) @def() + call void @omp_set_num_threads(i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..60 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +define internal void @.omp_outlined..58(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..59(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..60(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_1_merge_2_num_threads() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + %call1 = call i32 (...) @def() + call void @__kmpc_push_num_threads(%struct.ident_t* nonnull @1, i32 %0, i32 %call1) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..61 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..62 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..63 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..61(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..62(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..63(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @unmergable_1_merge_2_proc_bind() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void @__kmpc_push_proc_bind(%struct.ident_t* nonnull @1, i32 %0, i32 3) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..64 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..65 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..66 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..64(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..65(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..66(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define dso_local void @merge_2_unmergable_1_proc_bind() local_unnamed_addr { +entry: + %a = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) + %call = call i32 (...) @def() + store i32 %call, i32* %a, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..67 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..68 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @__kmpc_push_proc_bind(%struct.ident_t* nonnull @1, i32 %0, i32 3) + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..69 to void (i32*, i32*, ...)*), i32* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + ret void +} + +define internal void @.omp_outlined..67(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..68(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + +define internal void @.omp_outlined..69(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +entry: + %0 = load i32, i32* %a, align 4 + call void @use(i32 %0) + ret void +} + + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!2} +!2 = !{i64 2, i64 -1, i64 -1, i1 true} +; CHECK-LABEL: define {{[^@]+}}@merge() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4:#.*]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0:#.*]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_omp_in_parallel() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @omp_in_parallel() +; CHECK-NEXT: call void @use(i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_omp_set_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @omp_set_num_threads(i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_use() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_use..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_use..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP3]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_def_a() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_def_a..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP1]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_def_a..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[A]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_def_b() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_def_b..omp_par to void (i32*, i32*, ...)*), i32* [[A]], i32* [[CALL1_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[CALL1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[CALL1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: call void @use(i32 [[CALL1_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_def_b..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]], i32* [[CALL1_SEQ_OUTPUT_ALLOC:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[CALL1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_def_firstprivate() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_def_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP1]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_def_firstprivate..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[A]], align 4 +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[CALL1]] to i64 +; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32 +; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[B]], align 4 +; CHECK-NEXT: call void @use_ptr(i32* noundef nonnull [[B]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_above() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_above..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_above..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[B]], align 4 +; CHECK-NEXT: call void @use_ptr(i32* noundef nonnull [[B]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_scope_below() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_scope_below..omp_par to void (i32*, i32*, ...)*), i32* [[A]], i32* [[B]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_scope_below..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]], i32* [[B:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[B]], align 4 +; CHECK-NEXT: call void @use_ptr(i32* noundef nonnull [[B]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_scope_below_use() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_scope_below_use..omp_par to void (i32*, i32*, ...)*), i32* [[A]], i32* [[B]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP1]]) +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_scope_below_use..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]], i32* [[B:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[B]], align 4 +; CHECK-NEXT: call void @use_ptr(i32* noundef nonnull [[B]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_scope_par_use() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_scope_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A]], i32* [[B]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_scope_par_use..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]], i32* [[B:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL1]], i32* [[B]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_use_arg +; CHECK-SAME: (i32 [[ARG:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_use_arg..omp_par to void (i32*, i32*, ...)*), i32* [[ARG_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_seq_use_arg..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARG_ADDR:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..28(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[ARG_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..29(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[ARG_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP3]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions +; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..30(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..31(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30 +; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31 +; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq +; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..32(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..33(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32 +; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33 +; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_3() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..34(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..35(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: call void @.omp_outlined..36(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_omp_in_parallel() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @omp_in_parallel() +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32)* @unmergable_3_omp_in_parallel..omp_par to void (i32*, i32*, ...)*), i32* [[A]], i32 [[CALL2]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_omp_in_parallel..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]], i32 [[CALL2:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] +; CHECK: omp_region.end4: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split.split.split: +; CHECK-NEXT: call void @.omp_outlined..39(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body5: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] +; CHECK: seq.par.merged2: +; CHECK-NEXT: call void @use(i32 [[CALL2]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] +; CHECK: omp_region.body5.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: br label [[OMP_REGION_END4]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: call void @use(i32 [[CALL2]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] ; -; Merge all parallel regions. -define dso_local void @merge_all() local_unnamed_addr { - %1 = alloca i32, align 4 - %2 = bitcast i32* %1 to i8* - store i32 1, i32* %1, align 4 - %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_all..omp_par to void (i32*, i32*, ...)*), i32* nonnull %1) - %4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_all..omp_par.1 to void (i32*, i32*, ...)*), i32* nonnull %1) - ret void -} - -define internal void @merge_all..omp_par.1(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 3, i32* %2, align 4 - ret void -} - -define internal void @merge_all..omp_par(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 2, i32* %2, align 4 - ret void -} - - -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr - -declare !callback !1 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr - -; void merge_none() { -; int a = 1; -; #pragma omp parallel -; { -; a = 2; -; } -; a = 3; -; #pragma omp parallel -; { -; a = 4; -; } -; } -; -; Does not merge parallel regions, in-between store -; instruction is unsafe to execute in parallel. -define dso_local void @merge_none() local_unnamed_addr { - %1 = alloca i32, align 4 - %2 = bitcast i32* %1 to i8* - store i32 1, i32* %1, align 4 - %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_none..omp_par to void (i32*, i32*, ...)*), i32* nonnull %1) - store i32 3, i32* %1, align 4 - %4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_none..omp_par.2 to void (i32*, i32*, ...)*), i32* nonnull %1) - ret void -} - -define internal void @merge_none..omp_par.2(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 4, i32* %2, align 4 - ret void -} - -define internal void @merge_none..omp_par(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 2, i32* %2, align 4 - ret void -} - -; void merge_some() { -; int a = 1; -; #pragma omp parallel -; { -; a = 2; -; } -; a = 3; -; #pragma omp parallel -; { -; a = 4; -; } -; #pragma omp parallel -; { -; a = 5; -; } -; } ; -; Do not merge first parallel region, due to the -; unsafe store, but merge the two next parallel -; regions. -define dso_local void @merge_some() local_unnamed_addr { - %1 = alloca i32, align 4 - %2 = bitcast i32* %1 to i8* - store i32 1, i32* %1, align 4 - %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_some..omp_par to void (i32*, i32*, ...)*), i32* nonnull %1) - store i32 3, i32* %1, align 4 - %4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_some..omp_par.3 to void (i32*, i32*, ...)*), i32* nonnull %1) - %5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_some..omp_par.4 to void (i32*, i32*, ...)*), i32* nonnull %1) - ret void -} - -define internal void @merge_some..omp_par.4(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 5, i32* %2, align 4 - ret void -} - -define internal void @merge_some..omp_par.3(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 4, i32* %2, align 4 - ret void -} - -define internal void @merge_some..omp_par(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture %2) { - store i32 2, i32* %2, align 4 - ret void -} - -; void merge_cancellable_regions(int cancel1, int cancel2) -; { -; #pragma omp parallel -; { -; if(cancel1) { -; #pragma omp cancel parallel -; } -; } -; -; #pragma omp parallel -; { -; if (cancel2) { -; #pragma omp cancel parallel -; } -; } -; } +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void ; -; Merge correctly cancellable regions. -define dso_local void @merge_cancellable_regions(i32 %0, i32 %1) local_unnamed_addr { - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - store i32 %0, i32* %3, align 4 - store i32 %1, i32* %4, align 4 - %5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* nonnull %3) - %6 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_cancellable_regions..omp_par.5 to void (i32*, i32*, ...)*), i32* nonnull %4) - ret void -} - -define internal void @merge_cancellable_regions..omp_par.5(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture readonly %2) { - %4 = load i32, i32* %2, align 4 - %5 = icmp eq i32 %4, 0 - br i1 %5, label %6, label %7 - -6: ; preds = %3 - ret void - -7: ; preds = %3 - %8 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - %9 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %8, i32 1) - ret void -} - -define internal void @merge_cancellable_regions..omp_par(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1, i32* nocapture readonly %2) { - %4 = load i32, i32* %2, align 4 - %5 = icmp eq i32 %4, 0 - br i1 %5, label %6, label %7 - -6: ; preds = %3 - ret void - -7: ; preds = %3 - %8 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - %9 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %8, i32 1) - ret void -} - -declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) local_unnamed_addr - - -!llvm.module.flags = !{!0} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{!2} -!2 = !{i64 2, i64 -1, i64 -1, i1 true} -; CHECK-LABEL: define {{[^@]+}}@merge_all() local_unnamed_addr { -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1:@.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 1, i32* [[TMP2]], align 4 +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_omp_set_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..40 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @omp_set_num_threads(i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..41 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @omp_set_num_threads(i32 [[CALL2]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..42 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..40 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..41 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..42 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..44 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..45 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..43 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..44 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..45 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..46 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..48 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..46 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..47 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..48 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_omp_in_parallel() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..49 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @omp_in_parallel() +; CHECK-NEXT: call void @use(i32 [[CALL1]]) ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_all..omp_par.2 to void (i32*, i32*, ...)*), i32* [[TMP2]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @unmergable_1_merge_2_omp_in_parallel..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: -; CHECK-NEXT: br label [[DOTSPLIT_SPLIT:%.*]] -; CHECK: .split.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_all..omp_par.2 -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[TMP0:%.*]]) [[ATTR0:#.*]] { +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_omp_in_parallel..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: @@ -229,12 +3023,86 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @merge_all..omp_par(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP0]]) +; CHECK-NEXT: call void @.omp_outlined..50(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @merge_all..omp_par.1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP0]]) -; CHECK-NEXT: br label [[DOTSPLIT:%.*]] -; CHECK: .split: +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..51(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..49 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..50 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..51 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1_omp_in_parallel() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1_omp_in_parallel..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @omp_in_parallel() +; CHECK-NEXT: call void @use(i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..54 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1_omp_in_parallel..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..52(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..53(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] ; CHECK: omp.par.region.split: ; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] @@ -242,65 +3110,134 @@ ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_all..omp_par.1 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1:#.*]] { -; CHECK-NEXT: store i32 3, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..52 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..53 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..54 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_omp_set_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..55 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @omp_set_num_threads(i32 [[CALL1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @unmergable_1_merge_2_omp_set_num_threads..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_all..omp_par -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1]] { -; CHECK-NEXT: store i32 2, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_omp_set_num_threads..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..56(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..57(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_none() local_unnamed_addr { -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) -; CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 1, i32* [[TMP2]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @merge_none..omp_par to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2]]) -; CHECK-NEXT: store i32 3, i32* [[TMP2]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @merge_none..omp_par.2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2]]) +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..55 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_none..omp_par.2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1]] { -; CHECK-NEXT: store i32 4, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..56 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_none..omp_par -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1]] { -; CHECK-NEXT: store i32 2, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..57 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_some() local_unnamed_addr { -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) -; CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 1, i32* [[TMP2]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @merge_some..omp_par to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2]]) -; CHECK-NEXT: store i32 3, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1_omp_set_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP0]]) [[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_some..omp_par.5 to void (i32*, i32*, ...)*), i32* [[TMP2]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1_omp_set_num_threads..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: -; CHECK-NEXT: br label [[DOTSPLIT_SPLIT:%.*]] -; CHECK: .split.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @omp_set_num_threads(i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..60 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_some..omp_par.5 -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[TMP0:%.*]]) [[ATTR0]] { +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1_omp_set_num_threads..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: @@ -308,12 +3245,12 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @merge_some..omp_par.3(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP0]]) +; CHECK-NEXT: call void @.omp_outlined..58(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) ; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @merge_some..omp_par.4(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP0]]) -; CHECK-NEXT: br label [[DOTSPLIT:%.*]] -; CHECK: .split: +; CHECK-NEXT: call void @.omp_outlined..59(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] ; CHECK: omp.par.region.split: ; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] @@ -321,50 +3258,61 @@ ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_some..omp_par.4 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1]] { -; CHECK-NEXT: store i32 5, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..58 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_some..omp_par.3 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1]] { -; CHECK-NEXT: store i32 4, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..59 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_some..omp_par -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP2:%.*]]) [[ATTR1]] { -; CHECK-NEXT: store i32 2, i32* [[TMP2]], align 4 +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..60 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions -; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) -; CHECK-NEXT: [[TMP4:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP5:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_num_threads() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[CALL1:%.*]] = call i32 (...) @def() +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 [[CALL1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..61 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par.6 to void (i32*, i32*, ...)*), i32* [[TMP4]], i32* [[TMP5]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @unmergable_1_merge_2_num_threads..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: -; CHECK-NEXT: br label [[DOTSPLIT_SPLIT:%.*]] -; CHECK: .split.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par.6 -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) [[ATTR0]] { +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_num_threads..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP2]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: @@ -372,12 +3320,12 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @merge_cancellable_regions..omp_par(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[TMP0]]) +; CHECK-NEXT: call void @.omp_outlined..62(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) ; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @merge_cancellable_regions..omp_par.5(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[TMP1]]) -; CHECK-NEXT: br label [[DOTSPLIT:%.*]] -; CHECK: .split: +; CHECK-NEXT: call void @.omp_outlined..63(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] ; CHECK: omp.par.region.split: ; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] @@ -385,28 +3333,174 @@ ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par.5 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[TMP2:%.*]]) { -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; CHECK: 6: +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..61 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull [[GLOB1]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP8]], i32 noundef 1) +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..62 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[TMP0:%.*]], i32* noalias nocapture nofree readnone [[TMP1:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[TMP2:%.*]]) { -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; CHECK: 6: -; CHECK-NEXT: ret void -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull [[GLOB1]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP8]], i32 noundef 1) +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..63 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_proc_bind() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..64 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @unmergable_1_merge_2_proc_bind..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@unmergable_1_merge_2_proc_bind..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..65(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..66(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..64 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..65 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..66 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1_proc_bind() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @def() +; CHECK-NEXT: store i32 [[CALL]], i32* [[A]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1_proc_bind..omp_par to void (i32*, i32*, ...)*), i32* [[A]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..69 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1_proc_bind..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A:%.*]]) [[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..67(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..68(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..67 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..68 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..69 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ;