diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -523,7 +523,7 @@ /// State to track if we are in SPMD-mode, assumed or know, and why we decided /// we cannot be. If it is assumed, then RequiresFullRuntime should also be /// false. - BooleanStateWithPtrSetVector SPMDCompatibilityTracker; + BooleanStateWithPtrSetVector SPMDCompatibilityTracker; /// The __kmpc_target_init call in this kernel, if any. If we find more than /// one we abort as the kernel is malformed. @@ -2821,6 +2821,12 @@ AAKernelInfoFunction(const IRPosition &IRP, Attributor &A) : AAKernelInfo(IRP, A) {} + SmallPtrSet GuardedInstructions; + + SmallPtrSetImpl &getGuardedInstructions() { + return GuardedInstructions; + } + /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { // This is a high-level transform that might change the constant arguments @@ -3021,6 +3027,188 @@ return false; } + auto CreateGuardedRegion = [&](Instruction *RegionStartI, + Instruction *RegionEndI) { + LoopInfo *LI = nullptr; + DominatorTree *DT = nullptr; + MemorySSAUpdater *MSU = nullptr; + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + + BasicBlock *ParentBB = RegionStartI->getParent(); + Function *Fn = ParentBB->getParent(); + Module &M = *Fn->getParent(); + + // Create all the blocks and logic. + // ParentBB: + // goto RegionCheckTidBB + // RegionCheckTidBB: + // Tid = __kmpc_hardware_thread_id() + // if (Tid != 0) + // goto RegionBarrierBB + // RegionStartBB: + // + // goto RegionEndBB + // RegionEndBB: + // + // goto RegionBarrierBB + // RegionBarrierBB: + // __kmpc_simple_barrier_spmd() + // // second barrier is omitted if lacking escaping values. + // + // __kmpc_simple_barrier_spmd() + // goto RegionExitBB + // RegionExitBB: + // + + BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(), + DT, LI, MSU, "region.guarded.end"); + BasicBlock *RegionBarrierBB = + SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI, + MSU, "region.barrier"); + BasicBlock *RegionExitBB = + SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(), + DT, LI, MSU, "region.exit"); + BasicBlock *RegionStartBB = + SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded"); + + assert(ParentBB->getUniqueSuccessor() == RegionStartBB && + "Expected a different CFG"); + + BasicBlock *RegionCheckTidBB = SplitBlock( + ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid"); + + // Register basic blocks with the Attributor. + A.registerManifestAddedBasicBlock(*RegionEndBB); + A.registerManifestAddedBasicBlock(*RegionBarrierBB); + A.registerManifestAddedBasicBlock(*RegionExitBB); + A.registerManifestAddedBasicBlock(*RegionStartBB); + A.registerManifestAddedBasicBlock(*RegionCheckTidBB); + + bool HasBroadcastValues = false; + // Find escaping outputs from the guarded region to outside users and + // broadcast their values to them. + for (Instruction &I : *RegionStartBB) { + SmallPtrSet OutsideUsers; + for (User *Usr : I.users()) { + Instruction &UsrI = *cast(Usr); + if (UsrI.getParent() != RegionStartBB) + OutsideUsers.insert(&UsrI); + } + + if (OutsideUsers.empty()) + continue; + + HasBroadcastValues = true; + + // Emit a global variable in shared memory to store the broadcasted + // value. + auto *SharedMem = new GlobalVariable( + M, I.getType(), /* IsConstant */ false, + GlobalValue::InternalLinkage, UndefValue::get(I.getType()), + I.getName() + ".guarded.output.alloc", nullptr, + GlobalValue::NotThreadLocal, + static_cast(AddressSpace::Shared)); + + // Emit a store instruction to update the value. + new StoreInst(&I, SharedMem, RegionEndBB->getTerminator()); + + LoadInst *LoadI = new LoadInst(I.getType(), SharedMem, + I.getName() + ".guarded.output.load", + RegionBarrierBB->getTerminator()); + + // Emit a load instruction and replace uses of the output value. + for (Instruction *UsrI : OutsideUsers) { + assert(UsrI->getParent() == RegionExitBB && + "Expected escaping users in exit region"); + UsrI->replaceUsesOfWith(&I, LoadI); + } + } + + auto &OMPInfoCache = static_cast(A.getInfoCache()); + + // Go to tid check BB in ParentBB. + const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); + ParentBB->getTerminator()->eraseFromParent(); + OpenMPIRBuilder::LocationDescription Loc( + InsertPointTy(ParentBB, ParentBB->end()), DL); + OMPInfoCache.OMPBuilder.updateToLocation(Loc); + auto *SrcLocStr = OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc); + Value *Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr); + BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL); + + // Add check for Tid in RegionCheckTidBB + RegionCheckTidBB->getTerminator()->eraseFromParent(); + OpenMPIRBuilder::LocationDescription LocRegionCheckTid( + InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL); + OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid); + FunctionCallee HardwareTidFn = + OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_get_hardware_thread_id_in_block); + Value *Tid = + OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {}); + Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid); + OMPInfoCache.OMPBuilder.Builder + .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB) + ->setDebugLoc(DL); + + // First barrier for synchronization, ensures main thread has updated + // values. + FunctionCallee BarrierFn = + OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_barrier_simple_spmd); + OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy( + RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt())); + OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid}) + ->setDebugLoc(DL); + + // Second barrier ensures workers have read broadcast values. + if (HasBroadcastValues) + CallInst::Create(BarrierFn, {Ident, Tid}, "", + RegionBarrierBB->getTerminator()) + ->setDebugLoc(DL); + }; + + SmallVector, 4> GuardedRegions; + + for (Instruction *GuardedI : SPMDCompatibilityTracker) { + BasicBlock *BB = GuardedI->getParent(); + auto *CalleeAA = A.lookupAAFor( + IRPosition::function(*GuardedI->getFunction()), nullptr, + DepClassTy::NONE); + assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo"); + auto &CalleeAAFunction = *cast(CalleeAA); + // Continue if instruction is already guarded. + if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI)) + continue; + + Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr; + for (Instruction &I : *BB) { + // If instruction I needs to be guarded update the guarded region + // bounds. + if (SPMDCompatibilityTracker.contains(&I)) { + CalleeAAFunction.getGuardedInstructions().insert(&I); + if (GuardedRegionStart) + GuardedRegionEnd = &I; + else + GuardedRegionStart = GuardedRegionEnd = &I; + + continue; + } + + // Instruction I does not need guarding, store + // any region found and reset bounds. + if (GuardedRegionStart) { + GuardedRegions.push_back( + std::make_pair(GuardedRegionStart, GuardedRegionEnd)); + GuardedRegionStart = nullptr; + GuardedRegionEnd = nullptr; + } + } + } + + for (auto &GR : GuardedRegions) + CreateGuardedRegion(GR.first, GR.second); + // Adjust the global exec mode flag that tells the runtime what mode this // kernel is executed in. Function *Kernel = getAnchorScope(); @@ -3356,8 +3544,21 @@ if (llvm::all_of(Objects, [](const Value *Obj) { return isa(Obj); })) return true; + // Check for AAHeapToStack moved objects which must not be guarded. + auto &HS = A.getAAFor( + *this, IRPosition::function(*I.getFunction()), + DepClassTy::REQUIRED); + if (llvm::all_of(Objects, [&HS](const Value *Obj) { + auto *CB = dyn_cast(Obj); + if (!CB) + return false; + return HS.isAssumedHeapToStack(*CB); + })) { + return true; + } } - // For now we give up on everything but stores. + + // Insert instruction that needs guarding. SPMDCompatibilityTracker.insert(&I); return true; }; @@ -3371,6 +3572,9 @@ if (!IsKernelEntry) { updateReachingKernelEntries(A); updateParallelLevels(A); + + if (!ParallelLevels.isValidState()) + SPMDCompatibilityTracker.indicatePessimisticFixpoint(); } // Callback to check a call instruction. @@ -3521,8 +3725,10 @@ // If SPMDCompatibilityTracker is not fixed, we need to give up on the // idea we can run something unknown in SPMD-mode. - if (!SPMDCompatibilityTracker.isAtFixpoint()) + if (!SPMDCompatibilityTracker.isAtFixpoint()) { + SPMDCompatibilityTracker.indicatePessimisticFixpoint(); SPMDCompatibilityTracker.insert(&CB); + } // We have updated the state for this unknown call properly, there won't // be any change so we indicate a fixpoint. @@ -3565,6 +3771,7 @@ case OMPScheduleType::DistributeChunked: break; default: + SPMDCompatibilityTracker.indicatePessimisticFixpoint(); SPMDCompatibilityTracker.insert(&CB); break; }; @@ -3590,7 +3797,8 @@ // We do not look into tasks right now, just give up. SPMDCompatibilityTracker.insert(&CB); ReachedUnknownParallelRegions.insert(&CB); - break; + indicatePessimisticFixpoint(); + return; case OMPRTL___kmpc_alloc_shared: case OMPRTL___kmpc_free_shared: // Return without setting a fixpoint, to be resolved in updateImpl. @@ -3599,7 +3807,8 @@ // Unknown OpenMP runtime calls cannot be executed in SPMD-mode, // generally. SPMDCompatibilityTracker.insert(&CB); - break; + indicatePessimisticFixpoint(); + return; } // All other OpenMP runtime calls will not reach parallel regions so they // can be safely ignored for now. Since it is a known OpenMP runtime call we diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -16,12 +16,16 @@ ;; ;; void no_state_machine_needed() { ;; #pragma omp target teams -;; no_parallel_region_in_here(); +;; { +;; no_parallel_region_in_here(); +;; unknown_no_openmp(); +;; } ;; } ;; ;; void simple_state_machine() { ;; #pragma omp target teams ;; { +;; unknown_no_openmp(); ;; #pragma omp parallel ;; { p0(); } ;; no_parallel_region_in_here(); @@ -38,6 +42,7 @@ ;; void simple_state_machine_interprocedural() { ;; #pragma omp target teams ;; { +;; unknown_no_openmp(); ;; simple_state_machine_interprocedural_before(); ;; no_parallel_region_in_here(); ;; #pragma omp parallel @@ -75,6 +80,7 @@ ;; void simple_state_machine_pure() { ;; #pragma omp target teams ;; { +;; unknown_no_openmp(); ;; #pragma omp parallel ;; { p0(); } ;; unknown_pure(); @@ -115,27 +121,22 @@ %struct.ident_t = type { i32, i32, i32, i32, i8* } -@"_openmp_kernel_static_glob_rd$ptr" = internal addrspace(3) global i8* undef @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 -@__omp_offloading_2c_389eb_no_state_machine_needed_l14_exec_mode = weak constant i8 1 -@__omp_offloading_2c_389eb_simple_state_machine_l19_exec_mode = weak constant i8 1 -@__omp_offloading_2c_389eb_simple_state_machine_interprocedural_l35_exec_mode = weak constant i8 1 -@__omp_offloading_2c_389eb_simple_state_machine_with_fallback_l50_exec_mode = weak constant i8 1 -@__omp_offloading_2c_389eb_simple_state_machine_no_openmp_attr_l61_exec_mode = weak constant i8 1 -@__omp_offloading_2c_389eb_simple_state_machine_pure_l72_exec_mode = weak constant i8 1 -@__omp_offloading_2c_389eb_simple_state_machine_interprocedural_nested_recursive_l86_exec_mode = weak constant i8 1 -@__omp_offloading_2c_389eb_no_state_machine_weak_callee_l106_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode = weak constant i8 1 +@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode = weak constant i8 1 @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 @G = external global i32, align 4 -@V = external global i1, align 4 @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 -@llvm.compiler.used = appending global [8 x i8*] [i8* @__omp_offloading_2c_389eb_no_state_machine_needed_l14_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_l19_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_l35_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_with_fallback_l50_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_no_openmp_attr_l61_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_pure_l72_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_nested_recursive_l86_exec_mode, i8* @__omp_offloading_2c_389eb_no_state_machine_weak_callee_l106_exec_mode], section "llvm.metadata" +@llvm.compiler.used = appending global [8 x i8*] [i8* @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, i8* @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata" -; The second to last argument of __kmpc_target_init is set to false to indicate we do not need the generic runtime state machine. -; The last argument is also set to false due to SPMDization. -; No user code state machine is build because we do not need one. -define weak void @__omp_offloading_2c_389eb_no_state_machine_needed_l14() #0 { +define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -147,7 +148,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #2 + call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #3 call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) ret void @@ -155,25 +156,19 @@ ret void } -; Verify we will not store a constant true here even though initially all call sites pass `i1 true` for the second-to-last argument. -define internal i32 @__kmpc_target_init(%struct.ident_t*, i1, i1 %use_generic_state_machine, i1) { - store i1 %use_generic_state_machine, i1* @V - %call = call i32 @unknown() - ret i32 %call -} +declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @no_parallel_region_in_here() #8 + call void @no_parallel_region_in_here() #7 + call void @unknown_no_openmp() #8 ret void } -; Function Attrs: convergent nounwind define hidden void @no_parallel_region_in_here() #1 { entry: %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) @@ -191,14 +186,13 @@ ret void } -; Function Attrs: nounwind -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #2 +declare void @unknown_no_openmp() #2 + +declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3 declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) -; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine. -; A user code state machine is build because we do need one. No fallback and only one pointer comparison are needed. -define weak void @__omp_offloading_2c_389eb_simple_state_machine_l19() #0 { +define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -210,7 +204,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__1(i32* %.threadid_temp., i32* %.zero.addr) #2 + call void @__omp_outlined__1(i32* %.threadid_temp., i32* %.zero.addr) #3 call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) ret void @@ -218,7 +212,6 @@ ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 @@ -227,32 +220,30 @@ %captured_vars_addrs1 = alloca [0 x i8*], align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + call void @unknown_no_openmp() #8 %0 = load i32*, i32** %.global_tid..addr, align 8 %1 = load i32, i32* %0, align 4 %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** %2, i64 0) - call void @no_parallel_region_in_here() #8 + call void @no_parallel_region_in_here() #7 %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %3, i64 0) ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @p0() #8 + call void @p0() #7 ret void } -; Function Attrs: convergent -declare void @p0() #3 +declare void @p0() #4 -; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #4 { +define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 { entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 @@ -262,7 +253,7 @@ store i16 %0, i16* %.addr, align 2 store i32 %1, i32* %.addr1, align 4 call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__2(i32* %.addr1, i32* %.zero.addr) #2 + call void @__omp_outlined__2(i32* %.addr1, i32* %.zero.addr) #3 ret void } @@ -270,22 +261,19 @@ declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @p1() #8 + call void @p1() #7 ret void } -; Function Attrs: convergent -declare void @p1() #3 +declare void @p1() #4 -; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #4 { +define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #0 { entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 @@ -295,13 +283,11 @@ store i16 %0, i16* %.addr, align 2 store i32 %1, i32* %.addr1, align 4 call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #2 + call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #3 ret void } -; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine. -; A user code state machine is build because we do need one. No fallback and only two pointer comparison are needed. -define weak void @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_l35() #0 { +define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -313,7 +299,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #2 + call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #3 call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) ret void @@ -321,7 +307,6 @@ ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 @@ -329,17 +314,17 @@ %captured_vars_addrs = alloca [0 x i8*], align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @simple_state_machine_interprocedural_before() #8 - call void @no_parallel_region_in_here() #8 + call void @unknown_no_openmp() #8 + call void @simple_state_machine_interprocedural_before() #7 + call void @no_parallel_region_in_here() #7 %0 = load i32*, i32** %.global_tid..addr, align 8 %1 = load i32, i32* %0, align 4 %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** %2, i64 0) - call void @simple_state_machine_interprocedural_after() #8 + call void @simple_state_machine_interprocedural_after() #7 ret void } -; Function Attrs: convergent nounwind define hidden void @simple_state_machine_interprocedural_before() #1 { entry: %captured_vars_addrs = alloca [0 x i8*], align 8 @@ -349,19 +334,17 @@ ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @p1() #8 + call void @p1() #7 ret void } -; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #4 { +define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #0 { entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 @@ -371,11 +354,10 @@ store i16 %0, i16* %.addr, align 2 store i32 %1, i32* %.addr1, align 4 call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr) #2 + call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr) #3 ret void } -; Function Attrs: convergent nounwind define hidden void @simple_state_machine_interprocedural_after() #1 { entry: %captured_vars_addrs = alloca [0 x i8*], align 8 @@ -385,9 +367,7 @@ ret void } -; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine. -; A user code state machine is build because we do need one. A fallback indirect call and only two pointer comparison are needed. -define weak void @__omp_offloading_2c_389eb_simple_state_machine_with_fallback_l50() #0 { +define weak void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -399,7 +379,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #2 + call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3 call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) ret void @@ -407,7 +387,6 @@ ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 @@ -420,25 +399,23 @@ %1 = load i32, i32* %0, align 4 %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %2, i64 0) - %3 = call i32 @unknown() #8 - %4 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** %4, i64 0) + %call = call i32 @unknown() #7 + %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** + call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** %3, i64 0) ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @p0() #8 + call void @p0() #7 ret void } -; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #4 { +define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #0 { entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 @@ -448,26 +425,23 @@ store i16 %0, i16* %.addr, align 2 store i32 %1, i32* %.addr1, align 4 call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr) #2 + call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr) #3 ret void } -; Function Attrs: convergent -declare i32 @unknown() #3 +declare i32 @unknown() #4 -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @p1() #8 + call void @p1() #7 ret void } -; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__8_wrapper(i16 zeroext %0, i32 %1) #4 { +define internal void @__omp_outlined__8_wrapper(i16 zeroext %0, i32 %1) #0 { entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 @@ -477,13 +451,11 @@ store i16 %0, i16* %.addr, align 2 store i32 %1, i32* %.addr1, align 4 call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__8(i32* %.addr1, i32* %.zero.addr) #2 + call void @__omp_outlined__8(i32* %.addr1, i32* %.zero.addr) #3 ret void } -; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine. -; A user code state machine is build because we do need one. No fallback and only one pointer comparison is needed. -define weak void @__omp_offloading_2c_389eb_simple_state_machine_no_openmp_attr_l61() #0 { +define weak void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -495,7 +467,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__9(i32* %.threadid_temp., i32* %.zero.addr) #2 + call void @__omp_outlined__9(i32* %.threadid_temp., i32* %.zero.addr) #3 call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) ret void @@ -503,7 +475,6 @@ ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__9(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 @@ -516,25 +487,23 @@ %1 = load i32, i32* %0, align 4 %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** %2, i64 0) - call void @unknown_no_openmp() #9 + call void @unknown_no_openmp() #8 %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** %3, i64 0) ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__10(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @p0() #8 + call void @p0() #7 ret void } -; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__10_wrapper(i16 zeroext %0, i32 %1) #4 { +define internal void @__omp_outlined__10_wrapper(i16 zeroext %0, i32 %1) #0 { entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 @@ -544,26 +513,21 @@ store i16 %0, i16* %.addr, align 2 store i32 %1, i32* %.addr1, align 4 call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__10(i32* %.addr1, i32* %.zero.addr) #2 + call void @__omp_outlined__10(i32* %.addr1, i32* %.zero.addr) #3 ret void } -; Function Attrs: convergent -declare void @unknown_no_openmp() #5 - -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__11(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @p1() #8 + call void @p1() #7 ret void } -; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__11_wrapper(i16 zeroext %0, i32 %1) #4 { +define internal void @__omp_outlined__11_wrapper(i16 zeroext %0, i32 %1) #0 { entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 @@ -573,13 +537,11 @@ store i16 %0, i16* %.addr, align 2 store i32 %1, i32* %.addr1, align 4 call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__11(i32* %.addr1, i32* %.zero.addr) #2 + call void @__omp_outlined__11(i32* %.addr1, i32* %.zero.addr) #3 ret void } -; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine. -; A user code state machine is build because we do need one. No fallback and only one pointer comparison is needed. -define weak void @__omp_offloading_2c_389eb_simple_state_machine_pure_l72() #0 { +define weak void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -591,7 +553,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__12(i32* %.threadid_temp., i32* %.zero.addr) #2 + call void @__omp_outlined__12(i32* %.threadid_temp., i32* %.zero.addr) #3 call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) ret void @@ -599,7 +561,6 @@ ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__12(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 @@ -608,29 +569,28 @@ %captured_vars_addrs1 = alloca [0 x i8*], align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + call void @unknown_no_openmp() #8 %0 = load i32*, i32** %.global_tid..addr, align 8 %1 = load i32, i32* %0, align 4 %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** %2, i64 0) - call void @unknown_pure() #10 + call void @unknown_pure() #9 %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** %3, i64 0) ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__13(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @p0() #8 + call void @p0() #7 ret void } -; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__13_wrapper(i16 zeroext %0, i32 %1) #4 { +define internal void @__omp_outlined__13_wrapper(i16 zeroext %0, i32 %1) #0 { entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 @@ -640,26 +600,23 @@ store i16 %0, i16* %.addr, align 2 store i32 %1, i32* %.addr1, align 4 call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__13(i32* %.addr1, i32* %.zero.addr) #2 + call void @__omp_outlined__13(i32* %.addr1, i32* %.zero.addr) #3 ret void } -; Function Attrs: convergent nounwind readonly willreturn -declare void @unknown_pure() #6 +declare void @unknown_pure() #5 -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__14(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @p1() #8 + call void @p1() #7 ret void } -; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__14_wrapper(i16 zeroext %0, i32 %1) #4 { +define internal void @__omp_outlined__14_wrapper(i16 zeroext %0, i32 %1) #0 { entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 @@ -669,13 +626,11 @@ store i16 %0, i16* %.addr, align 2 store i32 %1, i32* %.addr1, align 4 call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__14(i32* %.addr1, i32* %.zero.addr) #2 + call void @__omp_outlined__14(i32* %.addr1, i32* %.zero.addr) #3 ret void } -; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine. -; A user code state machine is build because we do need one. No fallback and no pointer comparison is needed. -define weak void @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_nested_recursive_l86() #0 { +define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -687,7 +642,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__15(i32* %.threadid_temp., i32* %.zero.addr) #2 + call void @__omp_outlined__15(i32* %.threadid_temp., i32* %.zero.addr) #3 call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) ret void @@ -695,19 +650,17 @@ ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__15(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - %call = call i32 @omp_get_thread_num() - call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %call) #8 + %call = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #7 + call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %call) #7 ret void } -; Function Attrs: convergent nounwind define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 { entry: %a.addr = alloca i32, align 4 @@ -722,20 +675,17 @@ if.end: ; preds = %entry %1 = load i32, i32* %a.addr, align 4 %sub = sub nsw i32 %1, 1 - call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %sub) #8 - call void @simple_state_machine_interprocedural_nested_recursive_after_after() #8 + call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %sub) #7 + call void @simple_state_machine_interprocedural_nested_recursive_after_after() #7 br label %return return: ; preds = %if.end, %if.then ret void } -; Function Attrs: convergent -declare i32 @omp_get_thread_num() #3 +declare i32 @omp_get_thread_num(...) #4 -; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine. -; A pretty generic user code state machine is build because we do not know anything about the weak callee. -define weak void @__omp_offloading_2c_389eb_no_state_machine_weak_callee_l106() #0 { +define weak void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 @@ -747,7 +697,7 @@ user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__16(i32* %.threadid_temp., i32* %.zero.addr) #2 + call void @__omp_outlined__16(i32* %.threadid_temp., i32* %.zero.addr) #3 call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) ret void @@ -755,45 +705,38 @@ ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__16(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @weak_callee_empty() #8 + call void @weak_callee_empty() #7 ret void } -; Function Attrs: convergent nounwind define weak hidden void @weak_callee_empty() #1 { entry: ret void } -; Function Attrs: convergent nounwind -declare void @__kmpc_end_single(%struct.ident_t*, i32) #7 +declare i32 @__kmpc_single(%struct.ident_t*, i32) #6 -; Function Attrs: convergent nounwind -declare i32 @__kmpc_single(%struct.ident_t*, i32) #7 +declare void @__kmpc_end_single(%struct.ident_t*, i32) #6 -; Function Attrs: convergent nounwind -declare void @__kmpc_barrier(%struct.ident_t*, i32) #7 +declare void @__kmpc_barrier(%struct.ident_t*, i32) #6 -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__17(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @p0() #8 + call void @p0() #7 ret void } -; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__17_wrapper(i16 zeroext %0, i32 %1) #4 { +define internal void @__omp_outlined__17_wrapper(i16 zeroext %0, i32 %1) #0 { entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 @@ -803,23 +746,21 @@ store i16 %0, i16* %.addr, align 2 store i32 %1, i32* %.addr1, align 4 call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__17(i32* %.addr1, i32* %.zero.addr) #2 + call void @__omp_outlined__17(i32* %.addr1, i32* %.zero.addr) #3 ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__18(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @p0() #8 + call void @p0() #7 ret void } -; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__18_wrapper(i16 zeroext %0, i32 %1) #4 { +define internal void @__omp_outlined__18_wrapper(i16 zeroext %0, i32 %1) #0 { entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 @@ -829,11 +770,10 @@ store i16 %0, i16* %.addr, align 2 store i32 %1, i32* %.addr1, align 4 call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__18(i32* %.addr1, i32* %.zero.addr) #2 + call void @__omp_outlined__18(i32* %.addr1, i32* %.zero.addr) #3 ret void } -; Function Attrs: convergent nounwind define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 { entry: %captured_vars_addrs = alloca [0 x i8*], align 8 @@ -843,19 +783,17 @@ ret void } -; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__19(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: %.global_tid..addr = alloca i32*, align 8 %.bound_tid..addr = alloca i32*, align 8 store i32* %.global_tid., i32** %.global_tid..addr, align 8 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - call void @p0() #8 + call void @p0() #7 ret void } -; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__19_wrapper(i16 zeroext %0, i32 %1) #4 { +define internal void @__omp_outlined__19_wrapper(i16 zeroext %0, i32 %1) #0 { entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 @@ -865,102 +803,91 @@ store i16 %0, i16* %.addr, align 2 store i32 %1, i32* %.addr1, align 4 call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__19(i32* %.addr1, i32* %.zero.addr) #2 + call void @__omp_outlined__19(i32* %.addr1, i32* %.zero.addr) #3 ret void } -attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -attributes #1 = { convergent nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -attributes #2 = { nounwind } -attributes #3 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -attributes #4 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -attributes #5 = { convergent "frame-pointer"="all" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -attributes #6 = { convergent nounwind readonly willreturn "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -attributes #7 = { convergent nounwind } -attributes #8 = { convergent } -attributes #9 = { convergent "llvm.assume"="omp_no_openmp" } -attributes #10 = { convergent nounwind readonly willreturn } +attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +attributes #1 = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +attributes #2 = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +attributes #3 = { nounwind } +attributes #4 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +attributes #5 = { convergent nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +attributes #6 = { convergent nounwind } +attributes #7 = { convergent } +attributes #8 = { convergent "llvm.assume"="omp_no_openmp" } +attributes #9 = { convergent nounwind readonly willreturn } !omp_offload.info = !{!0, !1, !2, !3, !4, !5, !6, !7} !nvvm.annotations = !{!8, !9, !10, !11, !12, !13, !14, !15} -!llvm.module.flags = !{!16, !17, !18, !20, !21} -!llvm.ident = !{!19} - -!0 = !{i32 0, i32 44, i32 231915, !"simple_state_machine_interprocedural", i32 35, i32 2} -!1 = !{i32 0, i32 44, i32 231915, !"simple_state_machine_no_openmp_attr", i32 61, i32 4} -!2 = !{i32 0, i32 44, i32 231915, !"no_state_machine_needed", i32 14, i32 0} -!3 = !{i32 0, i32 44, i32 231915, !"simple_state_machine_with_fallback", i32 50, i32 3} -!4 = !{i32 0, i32 44, i32 231915, !"simple_state_machine_pure", i32 72, i32 5} -!5 = !{i32 0, i32 44, i32 231915, !"simple_state_machine_interprocedural_nested_recursive", i32 86, i32 6} -!6 = !{i32 0, i32 44, i32 231915, !"no_state_machine_weak_callee", i32 106, i32 7} -!7 = !{i32 0, i32 44, i32 231915, !"simple_state_machine", i32 19, i32 1} -!8 = !{void ()* @__omp_offloading_2c_389eb_no_state_machine_needed_l14, !"kernel", i32 1} -!9 = !{void ()* @__omp_offloading_2c_389eb_simple_state_machine_l19, !"kernel", i32 1} -!10 = !{void ()* @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_l35, !"kernel", i32 1} -!11 = !{void ()* @__omp_offloading_2c_389eb_simple_state_machine_with_fallback_l50, !"kernel", i32 1} -!12 = !{void ()* @__omp_offloading_2c_389eb_simple_state_machine_no_openmp_attr_l61, !"kernel", i32 1} -!13 = !{void ()* @__omp_offloading_2c_389eb_simple_state_machine_pure_l72, !"kernel", i32 1} -!14 = !{void ()* @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_nested_recursive_l86, !"kernel", i32 1} -!15 = !{void ()* @__omp_offloading_2c_389eb_no_state_machine_weak_callee_l106, !"kernel", i32 1} +!llvm.module.flags = !{!16, !17, !18} + +!0 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} +!1 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} +!2 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} +!3 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} +!4 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} +!5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} +!6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} +!7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} +!8 = !{void ()* @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} +!9 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} +!10 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} +!11 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} +!12 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} +!13 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} +!14 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} +!15 = !{void ()* @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} !16 = !{i32 1, !"wchar_size", i32 4} -!17 = !{i32 7, !"PIC Level", i32 2} -!18 = !{i32 7, !"frame-pointer", i32 2} -!19 = !{!"clang version 13.0.0"} -!20 = !{i32 7, !"openmp", i32 50} -!21 = !{i32 7, !"openmp-device", i32 50} -; CHECK: Function Attrs: convergent norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_no_state_machine_needed_l14 +!17 = !{i32 7, !"openmp", i32 50} +!18 = !{i32 7, !"openmp-device", i32 50} +; CHECK: Function Attrs: convergent noinline norecurse nounwind +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1:[0-9]+]], i1 noundef false, i1 noundef false, i1 noundef true) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 true) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-SAME: (%struct.ident_t* noalias nocapture nofree nonnull readnone align 8 dereferenceable(24) [[TMP0:%.*]], i1 [[TMP1:%.*]], i1 [[USE_GENERIC_STATE_MACHINE:%.*]], i1 [[TMP2:%.*]]) { -; CHECK-NEXT: store i1 false, i1* @V, align 4 -; CHECK-NEXT: [[CALL:%.*]] = call i32 @unknown() -; CHECK-NEXT: ret i32 [[CALL]] -; -; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ ; CHECK-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8:[0-9]+]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent nounwind +; CHECK: Function Attrs: convergent noinline nounwind ; CHECK-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; CHECK: omp_if.then: ; CHECK-NEXT: store i32 0, i32* @G, align 4 -; CHECK-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR2]] +; CHECK-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; CHECK-NEXT: br label [[OMP_IF_END]] ; CHECK: omp_if.end: -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR2]] +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent nounwind +; CHECK: Function Attrs: convergent noinline nounwind ; CHECK-LABEL: define {{[^@]+}}@no_parallel_region_in_here ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -977,14 +904,14 @@ ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_l19 +; CHECK: Function Attrs: convergent noinline norecurse nounwind +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) ; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: @@ -1021,16 +948,16 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1 ; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1039,6 +966,7 @@ ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) @@ -1048,17 +976,17 @@ ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR8:[0-9]+]] +; CHECK-NEXT: call void @p0() #[[ATTR9:[0-9]+]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1070,21 +998,21 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__3 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR8]] +; CHECK-NEXT: call void @p1() #[[ATTR9]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1096,18 +1024,18 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_interprocedural_l35 +; CHECK: Function Attrs: convergent noinline norecurse nounwind +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) ; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: @@ -1150,16 +1078,16 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__4 ; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1167,6 +1095,7 @@ ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]] ; CHECK-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR7]] ; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 @@ -1176,18 +1105,18 @@ ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent nounwind +; CHECK: Function Attrs: convergent noinline nounwind ; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent nounwind +; CHECK: Function Attrs: convergent noinline nounwind ; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -1198,17 +1127,17 @@ ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__5 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR8]] +; CHECK-NEXT: call void @p1() #[[ATTR9]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1220,22 +1149,22 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent nounwind +; CHECK: Function Attrs: convergent noinline nounwind ; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent nounwind +; CHECK: Function Attrs: convergent noinline nounwind ; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -1246,14 +1175,14 @@ ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_with_fallback_l50 +; CHECK: Function Attrs: convergent noinline norecurse nounwind +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) ; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: @@ -1292,16 +1221,16 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__6 ; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1313,23 +1242,23 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @unknown() #[[ATTR8]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) +; CHECK-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR9]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__7 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR8]] +; CHECK-NEXT: call void @p0() #[[ATTR9]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1341,21 +1270,21 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__8 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR8]] +; CHECK-NEXT: call void @p1() #[[ATTR9]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1367,18 +1296,18 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_no_openmp_attr_l61 +; CHECK: Function Attrs: convergent noinline norecurse nounwind +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) ; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: @@ -1415,16 +1344,16 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__9 ; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1436,23 +1365,23 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__10 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR8]] +; CHECK-NEXT: call void @p0() #[[ATTR9]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1464,21 +1393,21 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__11 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR8]] +; CHECK-NEXT: call void @p1() #[[ATTR9]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1490,30 +1419,64 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_pure_l72 +; CHECK: Function Attrs: convergent noinline norecurse nounwind +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef true, i1 noundef false, i1 noundef false) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; CHECK: worker_state_machine.begin: +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; CHECK: worker_state_machine.finished: +; CHECK-NEXT: ret void +; CHECK: worker_state_machine.is_active.check: +; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; CHECK: worker_state_machine.parallel_region.check: +; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__13_wrapper.ID to void (i16, i32)*) +; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; CHECK: worker_state_machine.parallel_region.execute: +; CHECK-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]]) +; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; CHECK: worker_state_machine.parallel_region.check1: +; CHECK-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; CHECK: worker_state_machine.parallel_region.execute2: +; CHECK-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]]) +; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; CHECK: worker_state_machine.parallel_region.check3: +; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; CHECK: worker_state_machine.parallel_region.end: +; CHECK-NEXT: call void @__kmpc_kernel_end_parallel() +; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; CHECK: worker_state_machine.done.barrier: +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; CHECK: thread.user_code.check: ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +; CHECK-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__12 ; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1522,25 +1485,26 @@ ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__13 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR8]] +; CHECK-NEXT: call void @p0() #[[ATTR9]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1552,21 +1516,21 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__14 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR8]] +; CHECK-NEXT: call void @p1() #[[ATTR9]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1578,18 +1542,18 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_interprocedural_nested_recursive_l86 +; CHECK: Function Attrs: convergent noinline norecurse nounwind +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) ; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: @@ -1604,11 +1568,13 @@ ; CHECK: worker_state_machine.is_active.check: ; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; CHECK: worker_state_machine.parallel_region.check: -; CHECK-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper +; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; CHECK: worker_state_machine.parallel_region.execute: ; CHECK-NEXT: call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]]) ; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; CHECK: worker_state_machine.parallel_region.check1: +; CHECK: worker_state_machine.parallel_region.fallback.execute: +; CHECK-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) ; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] ; CHECK: worker_state_machine.parallel_region.end: ; CHECK-NEXT: call void @__kmpc_kernel_end_parallel() @@ -1620,26 +1586,26 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] -; CHECK-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__15 ; CHECK-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR2]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR9]] ; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR7]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent nounwind +; CHECK: Function Attrs: convergent noinline nounwind ; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized ; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -1658,7 +1624,7 @@ ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent nounwind +; CHECK: Function Attrs: convergent noinline nounwind ; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after ; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -1672,21 +1638,21 @@ ; CHECK: if.end: ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR8]] -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR8]] +; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR9]] +; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR9]] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_no_state_machine_weak_callee_l106 +; CHECK: Function Attrs: convergent noinline norecurse nounwind +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) ; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: @@ -1713,15 +1679,15 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] -; CHECK-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__16 ; CHECK-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1731,24 +1697,24 @@ ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent nounwind +; CHECK: Function Attrs: convergent noinline nounwind ; CHECK-LABEL: define {{[^@]+}}@weak_callee_empty ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__17 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR8]] +; CHECK-NEXT: call void @p0() #[[ATTR9]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1760,21 +1726,21 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__18 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR8]] +; CHECK-NEXT: call void @p0() #[[ATTR9]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1786,22 +1752,22 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent nounwind +; CHECK: Function Attrs: convergent noinline nounwind ; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent nounwind +; CHECK: Function Attrs: convergent noinline nounwind ; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -1812,17 +1778,17 @@ ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__19 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR8]] +; CHECK-NEXT: call void @p0() #[[ATTR9]] ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: convergent noinline norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1834,63 +1800,57 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind -; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_no_state_machine_needed_l14 +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 ; CHECK-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1:[0-9]+]], i1 noundef false, i1 noundef true, i1 noundef true) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2:[0-9]+]] -; CHECK-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-DISABLED-SAME: (%struct.ident_t* noalias nocapture nofree nonnull readnone align 8 dereferenceable(24) [[TMP0:%.*]], i1 [[TMP1:%.*]], i1 [[USE_GENERIC_STATE_MACHINE:%.*]], i1 [[TMP2:%.*]]) { -; CHECK-DISABLED-NEXT: store i1 false, i1* @V, align 4 -; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() -; CHECK-DISABLED-NEXT: ret i32 [[CALL]] -; -; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8:[0-9]+]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; CHECK-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR2]] -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; CHECK-DISABLED: omp_if.then: ; CHECK-DISABLED-NEXT: store i32 0, i32* @G, align 4 -; CHECK-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: br label [[OMP_IF_END]] ; CHECK-DISABLED: omp_if.end: -; CHECK-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here ; CHECK-DISABLED-SAME: () #[[ATTR1]] { ; CHECK-DISABLED-NEXT: entry: @@ -1907,26 +1867,26 @@ ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind -; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_l19 +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef true, i1 noundef true) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -1935,6 +1895,7 @@ ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]] ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) @@ -1944,17 +1905,17 @@ ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR8:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9:[0-9]+]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -1966,21 +1927,21 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -1992,30 +1953,30 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind -; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_interprocedural_l35 +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef true, i1 noundef true) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2023,6 +1984,7 @@ ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]] ; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR7]] ; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]] ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 @@ -2032,18 +1994,18 @@ ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized ; CHECK-DISABLED-SAME: () #[[ATTR1]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before ; CHECK-DISABLED-SAME: () #[[ATTR1]] { ; CHECK-DISABLED-NEXT: entry: @@ -2054,17 +2016,17 @@ ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2076,22 +2038,22 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized ; CHECK-DISABLED-SAME: () #[[ATTR1]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after ; CHECK-DISABLED-SAME: () #[[ATTR1]] { ; CHECK-DISABLED-NEXT: entry: @@ -2102,26 +2064,26 @@ ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind -; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_with_fallback_l50 +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef true, i1 noundef true) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2133,23 +2095,23 @@ ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = call i32 @unknown() #[[ATTR8]] -; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR9]] +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2161,21 +2123,21 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2187,30 +2149,30 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind -; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_no_openmp_attr_l61 +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef true, i1 noundef true) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2222,23 +2184,23 @@ ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]] ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2250,21 +2212,21 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2276,30 +2238,30 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind -; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_pure_l72 +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef true, i1 noundef false, i1 noundef false) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] -; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2308,6 +2270,7 @@ ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]] ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) @@ -2316,17 +2279,17 @@ ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2338,21 +2301,21 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2364,40 +2327,40 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind -; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_interprocedural_nested_recursive_l86 +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef true, i1 noundef true) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] -; CHECK-DISABLED-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR2]] +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR9]] ; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR7]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized ; CHECK-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; CHECK-DISABLED-NEXT: entry: @@ -2416,7 +2379,7 @@ ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after ; CHECK-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; CHECK-DISABLED-NEXT: entry: @@ -2430,32 +2393,32 @@ ; CHECK-DISABLED: if.end: ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 ; CHECK-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR8]] -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR9]] ; CHECK-DISABLED-NEXT: br label [[RETURN]] ; CHECK-DISABLED: return: ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind -; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_no_state_machine_weak_callee_l106 +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef true, i1 noundef true) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] -; CHECK-DISABLED-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2465,24 +2428,24 @@ ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@weak_callee_empty ; CHECK-DISABLED-SAME: () #[[ATTR1]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2494,21 +2457,21 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2520,22 +2483,22 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized ; CHECK-DISABLED-SAME: () #[[ATTR1]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after ; CHECK-DISABLED-SAME: () #[[ATTR1]] { ; CHECK-DISABLED-NEXT: entry: @@ -2546,17 +2509,17 @@ ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19 ; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] ; CHECK-DISABLED-NEXT: ret void ; ; -; CHECK-DISABLED: Function Attrs: convergent norecurse nounwind +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper ; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: @@ -2568,6 +2531,6 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; CHECK-DISABLED-NEXT: ret void ; diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -8,15 +8,20 @@ @G = external global i32 ;. +; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32 +; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2 +; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ;. define weak void @kernel0() #0 { -; CHECK-LABEL: define {{[^@]+}}@kernel0() -; CHECK: #[[ATTR0:[0-9]+]] { +; CHECK-LABEL: define {{[^@]+}}@kernel0 +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) -; CHECK-NEXT: call void @helper0() -; CHECK-NEXT: call void @helper1() -; CHECK-NEXT: call void @helper2() +; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]] +; CHECK-NEXT: call void @helper1() #[[ATTR1]] +; CHECK-NEXT: call void @helper2() #[[ATTR1]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) ; CHECK-NEXT: ret void ; @@ -31,10 +36,10 @@ @kernel1_exec_mode = weak constant i8 1 define weak void @kernel1() #0 { -; CHECK-LABEL: define {{[^@]+}}@kernel1() -; CHECK: #[[ATTR0]] { +; CHECK-LABEL: define {{[^@]+}}@kernel1 +; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) -; CHECK-NEXT: call void @helper1() +; CHECK-NEXT: call void @helper1() #[[ATTR1]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) ; CHECK-NEXT: ret void ; @@ -47,13 +52,13 @@ @kernel2_exec_mode = weak constant i8 1 define weak void @kernel2() #0 { -; CHECK-LABEL: define {{[^@]+}}@kernel2() -; CHECK: #[[ATTR0]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) -; CHECK-NEXT: call void @helper0() -; CHECK-NEXT: call void @helper1() -; CHECK-NEXT: call void @helper2() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) +; CHECK-LABEL: define {{[^@]+}}@kernel2 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT: call void @helper0() #[[ATTR1]] +; CHECK-NEXT: call void @helper1() #[[ATTR1]] +; CHECK-NEXT: call void @helper2() #[[ATTR1]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) ; CHECK-NEXT: ret void ; %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) @@ -65,8 +70,22 @@ } define internal void @helper0() { -; CHECK-LABEL: define {{[^@]+}}@helper0() {{#[0-9]+}} { +; CHECK-LABEL: define {{[^@]+}}@helper0 +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]] +; CHECK: region.check.tid: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] +; CHECK: region.guarded: ; CHECK-NEXT: store i32 666, i32* @G, align 4 +; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] +; CHECK: region.guarded.end: +; CHECK-NEXT: br label [[REGION_BARRIER]] +; CHECK: region.barrier: +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +; CHECK-NEXT: br label [[REGION_EXIT:%.*]] +; CHECK: region.exit: ; CHECK-NEXT: ret void ; %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() @@ -75,7 +94,8 @@ } define internal void @helper1() { -; CHECK-LABEL: define {{[^@]+}}@helper1() {{#[0-9]+}} { +; CHECK-LABEL: define {{[^@]+}}@helper1 +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: br label [[F:%.*]] ; CHECK: t: ; CHECK-NEXT: unreachable @@ -93,8 +113,22 @@ } define internal void @helper2() { -; CHECK-LABEL: define {{[^@]+}}@helper2() {{#[0-9]+}} { -; CHECK-NEXT: store i32 666, i32* @G +; CHECK-LABEL: define {{[^@]+}}@helper2 +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]] +; CHECK: region.check.tid: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] +; CHECK: region.guarded: +; CHECK-NEXT: store i32 666, i32* @G, align 4 +; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] +; CHECK: region.guarded.end: +; CHECK-NEXT: br label [[REGION_BARRIER]] +; CHECK: region.barrier: +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +; CHECK-NEXT: br label [[REGION_EXIT:%.*]] +; CHECK: region.exit: ; CHECK-NEXT: ret void ; %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() @@ -117,9 +151,12 @@ !2 = !{void ()* @kernel0, !"kernel", i32 1} !3 = !{void ()* @kernel1, !"kernel", i32 1} !4 = !{void ()* @kernel2, !"kernel", i32 1} +; ;. ; CHECK: attributes #[[ATTR0]] = { "omp_target_num_teams"="777" "omp_target_thread_limit"="666" } -; +; CHECK: attributes #[[ATTR1]] = { nounwind } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind } +;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ; CHECK: [[META2:![0-9]+]] = !{void ()* @kernel0, !"kernel", i32 1} diff --git a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll --- a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll +++ b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll @@ -128,7 +128,8 @@ ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: ret void ; CHECK: f: -; CHECK-NEXT: unreachable +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: ret void ; %isSPMD = call i8 @__kmpc_is_spmd_exec_mode() %c = icmp eq i8 %isSPMD, 0 diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -23,6 +23,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 false, i1 true) ; CHECK-NEXT: call void @foo() #[[ATTR0:[0-9]+]] ; CHECK-NEXT: call void @bar() #[[ATTR0]] +; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true) ; CHECK-NEXT: ret void ; @@ -31,6 +32,7 @@ ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 false, i1 true) ; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR0:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR0]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; @@ -38,6 +40,7 @@ %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 true, i1 true) call void @foo() call void @bar() + call void @unknown_no_openmp() call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true) ret void } @@ -67,7 +70,7 @@ ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] -; CHECK-NEXT: call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR4:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 noundef 4) #[[ATTR0]] ; CHECK-NEXT: ret void ; @@ -75,7 +78,7 @@ ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] -; CHECK-DISABLED-NEXT: call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR3:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR4:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 noundef 4) #[[ATTR0]] ; CHECK-DISABLED-NEXT: ret void ; @@ -146,6 +149,8 @@ ; CHECK: declare void @__kmpc_free_shared(i8* nocapture, i64) declare void @__kmpc_free_shared(i8*, i64) +declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !6, !7} !nvvm.annotations = !{!5} diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals --include-generated-funcs ; RUN: opt -S -passes='openmp-opt' < %s | FileCheck %s ; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" @@ -13,15 +14,11 @@ ; CHECK-REMARKS: remark: replace_globalization.c:5:14: Replaced globalized variable with 4 bytes of shared memory ; CHECK-REMARKS-NOT: 6 bytes -; CHECK: [[SHARED_X:@.+]] = internal addrspace(3) global [16 x i8] undef -; CHECK: [[SHARED_Y:@.+]] = internal addrspace(3) global [4 x i8] undef - -; CHECK: %{{.*}} = call i8* @__kmpc_alloc_shared({{.*}}) -; CHECK: call void @__kmpc_free_shared({{.*}}) define dso_local void @foo() { entry: %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) %x = call i8* @__kmpc_alloc_shared(i64 4) + call void @unknown_no_openmp() %x_on_stack = bitcast i8* %x to i32* %0 = bitcast i32* %x_on_stack to i8* call void @use(i8* %0) @@ -32,6 +29,7 @@ define void @bar() { %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + call void @unknown_no_openmp() call void @baz() call void @qux() call void @negative_qux_spmd() @@ -39,10 +37,10 @@ ret void } -; CHECK: call void @use.internalized(i8* nofree writeonly addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* [[SHARED_X]], i32 0, i32 0) to i8*)) define internal void @baz() { entry: %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 false, i1 true) + call void @unknown_no_openmp() %cmp = icmp eq i32 %call, -1 br i1 %cmp, label %master, label %exit master: @@ -56,10 +54,10 @@ ret void } -; CHECK: call void @use.internalized(i8* nofree writeonly addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* [[SHARED_Y]], i32 0, i32 0) to i8*)) define internal void @qux() { entry: %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) + call void @unknown_no_openmp() %0 = icmp eq i32 %call, -1 br i1 %0, label %master, label %exit master: @@ -76,6 +74,7 @@ define internal void @negative_qux_spmd() { entry: %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 true, i1 true, i1 true) + call void @unknown_no_openmp() %0 = icmp eq i32 %call, -1 br i1 %0, label %master, label %exit master: @@ -110,6 +109,8 @@ declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) +declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} !nvvm.annotations = !{!7, !8} @@ -127,3 +128,105 @@ !10 = !DISubroutineType(types: !2) !11 = !DILocation(line: 5, column: 7, scope: !9) !12 = !DILocation(line: 5, column: 14, scope: !9) +;. +; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8* +; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [113 x i8] c" +; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] undef, align 32 +; CHECK: @[[Y:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 +;. +; CHECK-LABEL: define {{[^@]+}}@foo() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; CHECK-NEXT: [[X:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: call void @unknown_no_openmp() +; CHECK-NEXT: call void @use.internalized(i8* nofree writeonly [[X]]) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR1]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@bar() { +; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-NEXT: call void @unknown_no_openmp() +; CHECK-NEXT: call void @baz() +; CHECK-NEXT: call void @qux() +; CHECK-NEXT: call void @negative_qux_spmd() +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@baz() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true) +; CHECK-NEXT: call void @unknown_no_openmp() +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 +; CHECK-NEXT: br i1 [[CMP]], label [[MASTER:%.*]], label [[EXIT:%.*]] +; CHECK: master: +; CHECK-NEXT: call void @use.internalized(i8* nofree writeonly addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*)) #[[ATTR4]] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@qux() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i1 noundef false, i1 noundef true, i1 noundef true) +; CHECK-NEXT: call void @unknown_no_openmp() +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[CALL]], -1 +; CHECK-NEXT: br i1 [[TMP0]], label [[MASTER:%.*]], label [[EXIT:%.*]] +; CHECK: master: +; CHECK-NEXT: call void @use.internalized(i8* nofree writeonly addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y, i32 0, i32 0) to i8*)) #[[ATTR4]] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@negative_qux_spmd() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i1 noundef true, i1 noundef true, i1 noundef true) +; CHECK-NEXT: call void @unknown_no_openmp() +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[CALL]], -1 +; CHECK-NEXT: br i1 [[TMP0]], label [[MASTER:%.*]], label [[EXIT:%.*]] +; CHECK: master: +; CHECK-NEXT: [[Y:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 24) #[[ATTR1]], !dbg [[DBG9:![0-9]+]] +; CHECK-NEXT: call void @use.internalized(i8* nofree writeonly [[Y]]) #[[ATTR4]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Y]], i64 noundef 24) #[[ATTR1]] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@use.internalized +; CHECK-SAME: (i8* nofree writeonly [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: store i8* [[X]], i8** @S, align 8 +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define {{[^@]+}}@use +; CHECK-SAME: (i8* [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store i8* [[X]], i8** @S, align 8 +; CHECK-NEXT: ret void +; +;. +; CHECK: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR1]] = { nounwind } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind readnone } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK: attributes #[[ATTR4]] = { nounwind writeonly } +;. +; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c") +; CHECK: [[META2:![0-9]+]] = !{} +; CHECK: [[META3:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} +; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META7:![0-9]+]] = !{void ()* @foo, !"kernel", i32 1} +; CHECK: [[META8:![0-9]+]] = !{void ()* @bar, !"kernel", i32 1} +; CHECK: [[DBG9]] = !DILocation(line: 5, column: 14, scope: !10) +; CHECK: [[META10:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +; CHECK: [[META11:![0-9]+]] = !DISubroutineType(types: !2) +;. diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -49,6 +49,30 @@ ;; spmd_amenable(); ;; } ;; } +;; +;; void sequential_loop_to_shared_var_guarded() { +;; #pragma omp target teams +;; { +;; int x = 42; +;; for (int i = 0; i < 100; ++i) { +;; #pragma omp parallel +;; { +;; x++; +;; unknown(); +;; } +;; } +;; spmd_amenable(); +;; } +;; } +;; +;; void do_not_spmdize_target() { +;; #pragma omp target teams +;; { +;; // Incompatible parallel level, called both +;; // from parallel and target regions +;; unknown(); +;; } +;; } target triple = "nvptx64" @@ -56,33 +80,42 @@ @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 -@__omp_offloading_14_5896c35_sequential_loop_l5_exec_mode = weak constant i8 1 -@__omp_offloading_14_5896c35_sequential_loop_to_stack_var_l20_exec_mode = weak constant i8 1 -@__omp_offloading_14_5896c35_sequential_loop_to_shared_var_l35_exec_mode = weak constant i8 1 -@llvm.compiler.used = appending global [3 x i8*] [i8* @__omp_offloading_14_5896c35_sequential_loop_l5_exec_mode, i8* @__omp_offloading_14_5896c35_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_14_5896c35_sequential_loop_to_shared_var_l35_exec_mode], section "llvm.metadata" +@__omp_offloading_14_a34ca11_sequential_loop_l5_exec_mode = weak constant i8 1 +@__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20_exec_mode = weak constant i8 1 +@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35_exec_mode = weak constant i8 1 +@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50_exec_mode = weak constant i8 1 +@__omp_offloading_14_a34ca11_do_not_spmdize_target_l65_exec_mode = weak constant i8 1 +@llvm.compiler.used = appending global [5 x i8*] [i8* @__omp_offloading_14_a34ca11_sequential_loop_l5_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65_exec_mode], section "llvm.metadata" ;. ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 -; CHECK: @[[__OMP_OFFLOADING_14_5896C35_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2 -; CHECK: @[[__OMP_OFFLOADING_14_5896C35_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2 -; CHECK: @[[__OMP_OFFLOADING_14_5896C35_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2 -; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x i8*] [i8* @__omp_offloading_14_5896c35_sequential_loop_l5_exec_mode, i8* @__omp_offloading_14_5896c35_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_14_5896c35_sequential_loop_to_shared_var_l35_exec_mode], section "llvm.metadata" +; CHECK: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2 +; CHECK: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2 +; CHECK: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2 +; CHECK: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2 +; CHECK: @[[__OMP_OFFLOADING_14_A34CA11_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [5 x i8*] [i8* @__omp_offloading_14_a34ca11_sequential_loop_l5_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65_exec_mode], section "llvm.metadata" ; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 +; CHECK: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 ;. ; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 -; CHECK-DISABLED: @[[__OMP_OFFLOADING_14_5896C35_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 -; CHECK-DISABLED: @[[__OMP_OFFLOADING_14_5896C35_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 -; CHECK-DISABLED: @[[__OMP_OFFLOADING_14_5896C35_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 -; CHECK-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x i8*] [i8* @__omp_offloading_14_5896c35_sequential_loop_l5_exec_mode, i8* @__omp_offloading_14_5896c35_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_14_5896c35_sequential_loop_to_shared_var_l35_exec_mode], section "llvm.metadata" +; CHECK-DISABLED: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK-DISABLED: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK-DISABLED: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK-DISABLED: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK-DISABLED: @[[__OMP_OFFLOADING_14_A34CA11_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [5 x i8*] [i8* @__omp_offloading_14_a34ca11_sequential_loop_l5_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65_exec_mode], section "llvm.metadata" ; CHECK-DISABLED: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 +; CHECK-DISABLED: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 ; CHECK-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ; CHECK-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ; CHECK-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; CHECK-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. -define weak void @__omp_offloading_14_5896c35_sequential_loop_l5() #0 { -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_5896c35_sequential_loop_l5 +define weak void @__omp_offloading_14_a34ca11_sequential_loop_l5() #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_l5 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 @@ -99,7 +132,7 @@ ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; -; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_5896c35_sequential_loop_l5 +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_l5 ; CHECK-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 @@ -191,9 +224,9 @@ ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[I]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 -; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: call void @spmd_amenable() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ @@ -219,7 +252,7 @@ ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = load i32, i32* [[I]], align 4 ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 -; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-DISABLED: for.end: ; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5:[0-9]+]] ; CHECK-DISABLED-NEXT: ret void @@ -250,7 +283,7 @@ %4 = load i32, i32* %i, align 4 %inc = add nsw i32 %4, 1 store i32 %inc, i32* %i, align 4 - br label %for.cond, !llvm.loop !9 + br label %for.cond, !llvm.loop !13 for.end: ; preds = %for.cond call void @spmd_amenable() #4 @@ -263,7 +296,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 @@ -337,8 +370,8 @@ declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) -define weak void @__omp_offloading_14_5896c35_sequential_loop_to_stack_var_l20() #0 { -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_5896c35_sequential_loop_to_stack_var_l20 +define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20() #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 @@ -355,7 +388,7 @@ ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; -; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_5896c35_sequential_loop_to_stack_var_l20 +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 @@ -432,7 +465,7 @@ ; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 ; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* -; CHECK-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR4]] +; CHECK-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR5]] ; CHECK-NEXT: store i32 0, i32* [[I]], align 4 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: @@ -448,9 +481,9 @@ ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 -; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: call void @spmd_amenable() #[[ATTR4]] +; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 @@ -479,7 +512,7 @@ ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4 ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 -; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-DISABLED: for.end: ; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] ; CHECK-DISABLED-NEXT: ret void @@ -513,7 +546,7 @@ %4 = load i32, i32* %i, align 4 %inc = add nsw i32 %4, 1 store i32 %inc, i32* %i, align 4 - br label %for.cond, !llvm.loop !11 + br label %for.cond, !llvm.loop !15 for.end: ; preds = %for.cond call void @spmd_amenable() #4 @@ -531,7 +564,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @unknown() #[[ATTR5]] +; CHECK-NEXT: call void @unknown() #[[ATTR6]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 @@ -595,8 +628,8 @@ declare void @__kmpc_free_shared(i8* nocapture, i64) #3 -define weak void @__omp_offloading_14_5896c35_sequential_loop_to_shared_var_l35() #0 { -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_5896c35_sequential_loop_to_shared_var_l35 +define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35() #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 @@ -613,7 +646,7 @@ ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; -; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_5896c35_sequential_loop_to_shared_var_l35 +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 @@ -705,9 +738,9 @@ ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 -; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: call void @spmd_amenable() #[[ATTR4]] +; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 @@ -735,7 +768,7 @@ ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4 ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 -; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-DISABLED: for.end: ; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] ; CHECK-DISABLED-NEXT: ret void @@ -771,7 +804,7 @@ %6 = load i32, i32* %i, align 4 %inc = add nsw i32 %6, 1 store i32 %inc, i32* %i, align 4 - br label %for.cond, !llvm.loop !12 + br label %for.cond, !llvm.loop !16 for.end: ; preds = %for.cond call void @spmd_amenable() #4 @@ -790,7 +823,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; CHECK-NEXT: call void @unknown() #[[ATTR5]] +; CHECK-NEXT: call void @unknown() #[[ATTR6]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 @@ -875,6 +908,430 @@ ret void } +define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50() #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK: user_code.entry: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +; CHECK-NEXT: ret void +; CHECK: worker.exit: +; CHECK-NEXT: ret void +; +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; CHECK-DISABLED: worker_state_machine.begin: +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; CHECK-DISABLED: worker_state_machine.finished: +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker_state_machine.is_active.check: +; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; CHECK-DISABLED: worker_state_machine.parallel_region.check: +; CHECK-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) +; CHECK-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; CHECK-DISABLED: worker_state_machine.parallel_region.execute: +; CHECK-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; CHECK-DISABLED: worker_state_machine.parallel_region.fallback.execute: +; CHECK-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; CHECK-DISABLED: worker_state_machine.parallel_region.end: +; CHECK-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() +; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; CHECK-DISABLED: worker_state_machine.done.barrier: +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; CHECK-DISABLED: thread.user_code.check: +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +entry: + %.zero.addr = alloca i32, align 4 + %.threadid_temp. = alloca i32, align 4 + store i32 0, i32* %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %exec_user_code = icmp eq i32 %0, -1 + br i1 %exec_user_code, label %user_code.entry, label %worker.exit + +user_code.entry: ; preds = %entry + %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) + store i32 %1, i32* %.threadid_temp., align 4 + call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3 + call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + ret void + +worker.exit: ; preds = %entry + ret void +} + +define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__6 +; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*) to i32* +; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]] +; CHECK: region.check.tid: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] +; CHECK: region.guarded: +; CHECK-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4 +; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] +; CHECK: region.guarded.end: +; CHECK-NEXT: br label [[REGION_BARRIER]] +; CHECK: region.barrier: +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; CHECK-NEXT: br label [[REGION_EXIT:%.*]] +; CHECK: region.exit: +; CHECK-NEXT: store i32 0, i32* [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 100 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +; CHECK-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*), i8** [[TMP3]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP4]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP5]], i64 noundef 1) +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[I]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-NEXT: ret void +; +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*) to i32* +; CHECK-DISABLED-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[I]], align 4 +; CHECK-DISABLED-NEXT: br label [[FOR_COND:%.*]] +; CHECK-DISABLED: for.cond: +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[I]], align 4 +; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 100 +; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +; CHECK-DISABLED: for.body: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +; CHECK-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*), i8** [[TMP1]], align 8 +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP2]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 1) +; CHECK-DISABLED-NEXT: br label [[FOR_INC:%.*]] +; CHECK-DISABLED: for.inc: +; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4 +; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 +; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-DISABLED: for.end: +; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-DISABLED-NEXT: ret void +; +entry: + %.global_tid..addr = alloca i32*, align 8 + %.bound_tid..addr = alloca i32*, align 8 + %i = alloca i32, align 4 + %captured_vars_addrs = alloca [1 x i8*], align 8 + store i32* %.global_tid., i32** %.global_tid..addr, align 8 + store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %x = call i8* @__kmpc_alloc_shared(i64 4) + %x_on_stack = bitcast i8* %x to i32* + store i32 42, i32* %x_on_stack, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 100 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = getelementptr inbounds [1 x i8*], [1 x i8*]* %captured_vars_addrs, i64 0, i64 0 + %2 = bitcast i32* %x_on_stack to i8* + store i8* %2, i8** %1, align 8 + %3 = load i32*, i32** %.global_tid..addr, align 8 + %4 = load i32, i32* %3, align 4 + %5 = bitcast [1 x i8*]* %captured_vars_addrs to i8** + call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %4, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %5, i64 1) + br label %for.inc + +for.inc: ; preds = %for.body + %6 = load i32, i32* %i, align 4 + %inc = add nsw i32 %6, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond, !llvm.loop !17 + +for.end: ; preds = %for.cond + call void @spmd_amenable() #4 + call void @__kmpc_free_shared(i8* %x, i64 4) + ret void +} + +define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__7 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 +; CHECK-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: store i32 [[INC]], i32* [[X]], align 4 +; CHECK-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-NEXT: ret void +; +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 +; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4 +; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-DISABLED-NEXT: ret void +; +entry: + %.global_tid..addr = alloca i32*, align 8 + %.bound_tid..addr = alloca i32*, align 8 + %x.addr = alloca i32*, align 8 + store i32* %.global_tid., i32** %.global_tid..addr, align 8 + store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + store i32* %x, i32** %x.addr, align 8 + %0 = load i32*, i32** %x.addr, align 8 + %1 = load i32, i32* %0, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, i32* %0, align 4 + call void @unknown() #5 + ret void +} + +define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +; CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 +; CHECK-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +; CHECK-NEXT: ret void +; +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 +; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 +; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +; CHECK-DISABLED-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +entry: + %.addr = alloca i16, align 2 + %.addr1 = alloca i32, align 4 + %.zero.addr = alloca i32, align 4 + %global_args = alloca i8**, align 8 + store i32 0, i32* %.zero.addr, align 4 + store i16 %0, i16* %.addr, align 2 + store i32 %1, i32* %.addr1, align 4 + call void @__kmpc_get_shared_variables(i8*** %global_args) + %2 = load i8**, i8*** %global_args, align 8 + %3 = getelementptr inbounds i8*, i8** %2, i64 0 + %4 = bitcast i8** %3 to i32** + %5 = load i32*, i32** %4, align 8 + call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr, i32* %5) #3 + ret void +} + +define weak void @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65() #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_do_not_spmdize_target_l65 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; CHECK: worker_state_machine.begin: +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; CHECK: worker_state_machine.finished: +; CHECK-NEXT: ret void +; CHECK: worker_state_machine.is_active.check: +; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; CHECK: worker_state_machine.parallel_region.fallback.execute: +; CHECK-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; CHECK: worker_state_machine.parallel_region.end: +; CHECK-NEXT: call void @__kmpc_kernel_end_parallel() +; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; CHECK: worker_state_machine.done.barrier: +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; CHECK: thread.user_code.check: +; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK: user_code.entry: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-NEXT: ret void +; CHECK: worker.exit: +; CHECK-NEXT: ret void +; +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_do_not_spmdize_target_l65 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; CHECK-DISABLED: worker_state_machine.begin: +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; CHECK-DISABLED: worker_state_machine.finished: +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker_state_machine.is_active.check: +; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; CHECK-DISABLED: worker_state_machine.parallel_region.fallback.execute: +; CHECK-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; CHECK-DISABLED: worker_state_machine.parallel_region.end: +; CHECK-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() +; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; CHECK-DISABLED: worker_state_machine.done.barrier: +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; CHECK-DISABLED: thread.user_code.check: +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +entry: + %.zero.addr = alloca i32, align 4 + %.threadid_temp. = alloca i32, align 4 + store i32 0, i32* %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %exec_user_code = icmp eq i32 %0, -1 + br i1 %exec_user_code, label %user_code.entry, label %worker.exit + +user_code.entry: ; preds = %entry + %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) + store i32 %1, i32* %.threadid_temp., align 4 + call void @__omp_outlined__8(i32* %.threadid_temp., i32* %.zero.addr) #3 + call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + ret void + +worker.exit: ; preds = %entry + ret void +} + +define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__8 +; CHECK-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-NEXT: ret void +; +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-DISABLED-NEXT: ret void +; +entry: + %.global_tid..addr = alloca i32*, align 8 + %.bound_tid..addr = alloca i32*, align 8 + store i32* %.global_tid., i32** %.global_tid..addr, align 8 + store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + call void @unknown() #5 + ret void +} + attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } attributes #2 = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } @@ -882,30 +1339,36 @@ attributes #4 = { convergent "llvm.assume"="ompx_spmd_amenable" } attributes #5 = { convergent } -!omp_offload.info = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5} -!llvm.module.flags = !{!6, !7, !8} - -!0 = !{i32 0, i32 20, i32 92892213, !"sequential_loop_to_stack_var", i32 20, i32 1} -!1 = !{i32 0, i32 20, i32 92892213, !"sequential_loop", i32 5, i32 0} -!2 = !{i32 0, i32 20, i32 92892213, !"sequential_loop_to_shared_var", i32 35, i32 2} -!3 = !{void ()* @__omp_offloading_14_5896c35_sequential_loop_l5, !"kernel", i32 1} -!4 = !{void ()* @__omp_offloading_14_5896c35_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -!5 = !{void ()* @__omp_offloading_14_5896c35_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -!6 = !{i32 1, !"wchar_size", i32 4} -!7 = !{i32 7, !"openmp", i32 50} -!8 = !{i32 7, !"openmp-device", i32 50} -!9 = distinct !{!9, !10} -!10 = !{!"llvm.loop.mustprogress"} -!11 = distinct !{!11, !10} -!12 = distinct !{!12, !10} +!omp_offload.info = !{!0, !1, !2, !3, !4} +!nvvm.annotations = !{!5, !6, !7, !8, !9} +!llvm.module.flags = !{!10, !11, !12} + +!0 = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_stack_var", i32 20, i32 1} +!1 = !{i32 0, i32 20, i32 171231761, !"sequential_loop", i32 5, i32 0} +!2 = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_shared_var", i32 35, i32 2} +!3 = !{i32 0, i32 20, i32 171231761, !"do_not_spmdize_target", i32 65, i32 4} +!4 = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} +!5 = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_l5, !"kernel", i32 1} +!6 = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20, !"kernel", i32 1} +!7 = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35, !"kernel", i32 1} +!8 = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} +!9 = !{void ()* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65, !"kernel", i32 1} +!10 = !{i32 1, !"wchar_size", i32 4} +!11 = !{i32 7, !"openmp", i32 50} +!12 = !{i32 7, !"openmp-device", i32 50} +!13 = distinct !{!13, !14} +!14 = !{!"llvm.loop.mustprogress"} +!15 = distinct !{!15, !14} +!16 = distinct !{!16, !14} +!17 = distinct !{!17, !14} ;. ; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; CHECK: attributes #[[ATTR3]] = { nounwind } -; CHECK: attributes #[[ATTR4]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; CHECK: attributes #[[ATTR5]] = { convergent } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nounwind } +; CHECK: attributes #[[ATTR5]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; CHECK: attributes #[[ATTR6]] = { convergent } ;. ; CHECK-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; CHECK-DISABLED: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } @@ -915,31 +1378,41 @@ ; CHECK-DISABLED: attributes #[[ATTR5]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; CHECK-DISABLED: attributes #[[ATTR6]] = { convergent } ;. -; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 92892213, !"sequential_loop_to_stack_var", i32 20, i32 1} -; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 92892213, !"sequential_loop", i32 5, i32 0} -; CHECK: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 92892213, !"sequential_loop_to_shared_var", i32 35, i32 2} -; CHECK: [[META3:![0-9]+]] = !{void ()* @__omp_offloading_14_5896c35_sequential_loop_l5, !"kernel", i32 1} -; CHECK: [[META4:![0-9]+]] = !{void ()* @__omp_offloading_14_5896c35_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; CHECK: [[META5:![0-9]+]] = !{void ()* @__omp_offloading_14_5896c35_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; CHECK: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; CHECK: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[LOOP9]] = distinct !{!9, !10} -; CHECK: [[META10:![0-9]+]] = !{!"llvm.loop.mustprogress"} -; CHECK: [[LOOP11]] = distinct !{!11, !10} -; CHECK: [[LOOP12]] = distinct !{!12, !10} +; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_stack_var", i32 20, i32 1} +; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop", i32 5, i32 0} +; CHECK: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_shared_var", i32 35, i32 2} +; CHECK: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"do_not_spmdize_target", i32 65, i32 4} +; CHECK: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} +; CHECK: [[META5:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_l5, !"kernel", i32 1} +; CHECK: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20, !"kernel", i32 1} +; CHECK: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35, !"kernel", i32 1} +; CHECK: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} +; CHECK: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65, !"kernel", i32 1} +; CHECK: [[META10:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK: [[META11:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META12:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[LOOP13]] = distinct !{!13, !14} +; CHECK: [[META14:![0-9]+]] = !{!"llvm.loop.mustprogress"} +; CHECK: [[LOOP15]] = distinct !{!15, !14} +; CHECK: [[LOOP16]] = distinct !{!16, !14} +; CHECK: [[LOOP17]] = distinct !{!17, !14} ;. -; CHECK-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 92892213, !"sequential_loop_to_stack_var", i32 20, i32 1} -; CHECK-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 92892213, !"sequential_loop", i32 5, i32 0} -; CHECK-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 92892213, !"sequential_loop_to_shared_var", i32 35, i32 2} -; CHECK-DISABLED: [[META3:![0-9]+]] = !{void ()* @__omp_offloading_14_5896c35_sequential_loop_l5, !"kernel", i32 1} -; CHECK-DISABLED: [[META4:![0-9]+]] = !{void ()* @__omp_offloading_14_5896c35_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; CHECK-DISABLED: [[META5:![0-9]+]] = !{void ()* @__omp_offloading_14_5896c35_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; CHECK-DISABLED: [[META6:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -; CHECK-DISABLED: [[META7:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK-DISABLED: [[META8:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK-DISABLED: [[LOOP9]] = distinct !{!9, !10} -; CHECK-DISABLED: [[META10:![0-9]+]] = !{!"llvm.loop.mustprogress"} -; CHECK-DISABLED: [[LOOP11]] = distinct !{!11, !10} -; CHECK-DISABLED: [[LOOP12]] = distinct !{!12, !10} +; CHECK-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_stack_var", i32 20, i32 1} +; CHECK-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop", i32 5, i32 0} +; CHECK-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_shared_var", i32 35, i32 2} +; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"do_not_spmdize_target", i32 65, i32 4} +; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} +; CHECK-DISABLED: [[META5:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_l5, !"kernel", i32 1} +; CHECK-DISABLED: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20, !"kernel", i32 1} +; CHECK-DISABLED: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35, !"kernel", i32 1} +; CHECK-DISABLED: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} +; CHECK-DISABLED: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65, !"kernel", i32 1} +; CHECK-DISABLED: [[META10:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK-DISABLED: [[META11:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK-DISABLED: [[META12:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK-DISABLED: [[LOOP13]] = distinct !{!13, !14} +; CHECK-DISABLED: [[META14:![0-9]+]] = !{!"llvm.loop.mustprogress"} +; CHECK-DISABLED: [[LOOP15]] = distinct !{!15, !14} +; CHECK-DISABLED: [[LOOP16]] = distinct !{!16, !14} +; CHECK-DISABLED: [[LOOP17]] = distinct !{!17, !14} ;.